xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision ff1a2e19e92ca3eb486258317b2f290407e2b2d1)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash){
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs){
189           if (a->ignore_ltriangular){
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined (PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B = baij->B;
222               b = (Mat_SeqBAIJ*)(B)->data;
223               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba=b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         mat->assembled = PETSC_FALSE;
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++  = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend){
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         }
313         else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined (PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined (PETSC_USE_CTABLE)
334 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col =  in[j];
342             }
343           }
344           else col = in[j];
345           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346         }
347       }
348     } else {
349       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash || mat->nooffprocentries) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
501   mat->insertmode = addv; /* in case this processor had no cache */
502 
503   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
504   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
515 {
516   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
517   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
518   PetscErrorCode ierr;
519   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
520   PetscInt       *row,*col;
521   PetscBool      other_disassembled;
522   PetscMPIInt    n;
523   PetscBool      r1,r2,r3;
524   MatScalar      *val;
525   InsertMode     addv = mat->insertmode;
526 
527   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
528   PetscFunctionBegin;
529 
530   if (!baij->donotstash &&  !mat->nooffprocentries) {
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
538         if (j < n) ncols = j-i;
539         else       ncols = n-i;
540         /* Now assemble all these values with a single function call */
541         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
542         i = j;
543       }
544     }
545     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
546     /* Now process the block-stash. Since the values are stashed column-oriented,
547        set the roworiented flag to column oriented, and after MatSetValues()
548        restore the original flags */
549     r1 = baij->roworiented;
550     r2 = a->roworiented;
551     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
552     baij->roworiented = PETSC_FALSE;
553     a->roworiented    = PETSC_FALSE;
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
555     while (1) {
556       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
557       if (!flg) break;
558 
559       for (i=0; i<n;) {
560         /* Now identify the consecutive vals belonging to the same row */
561         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
562         if (j < n) ncols = j-i;
563         else       ncols = n-i;
564         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
565         i = j;
566       }
567     }
568     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
569     baij->roworiented = r1;
570     a->roworiented    = r2;
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598   baij->rowvalues = 0;
599 
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #undef __FUNCT__
605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode    ierr;
610   PetscInt          bs = mat->rmap->bs;
611   PetscMPIInt       size = baij->size,rank = baij->rank;
612   PetscBool         iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
618   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
619   if (iascii) {
620     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
621     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
622       MatInfo info;
623       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
624       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
627       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
631       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
633       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
634       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
635       PetscFunctionReturn(0);
636     } else if (format == PETSC_VIEWER_ASCII_INFO) {
637       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
640       PetscFunctionReturn(0);
641     }
642   }
643 
644   if (isdraw) {
645     PetscDraw  draw;
646     PetscBool  isnull;
647     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
648     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
649   }
650 
651   if (size == 1) {
652     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
653     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
654   } else {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661 
662     /* Should this be the same type as mat? */
663     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
664     if (!rank) {
665       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
666     } else {
667       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
668     }
669     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
670     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
671     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
672     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
673 
674     /* copy over the A part */
675     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
676     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
677     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
678 
679     for (i=0; i<mbs; i++) {
680       rvals[0] = bs*(baij->rstartbs + i);
681       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
682       for (j=ai[i]; j<ai[i+1]; j++) {
683         col = (baij->cstartbs+aj[j])*bs;
684         for (k=0; k<bs; k++) {
685           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
686           col++; a += bs;
687         }
688       }
689     }
690     /* copy over the B part */
691     Bloc = (Mat_SeqBAIJ*)baij->B->data;
692     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
693     for (i=0; i<mbs; i++) {
694 
695       rvals[0] = bs*(baij->rstartbs + i);
696       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
697       for (j=ai[i]; j<ai[i+1]; j++) {
698         col = baij->garray[aj[j]]*bs;
699         for (k=0; k<bs; k++) {
700           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
701           col++; a += bs;
702         }
703       }
704     }
705     ierr = PetscFree(rvals);CHKERRQ(ierr);
706     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     /*
709        Everyone has to call to draw the matrix since the graphics waits are
710        synchronized across all processors that share the PetscDraw object
711     */
712     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
713     if (!rank) {
714       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
715           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
716       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
717       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
718     }
719     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
720     ierr = MatDestroy(&A);CHKERRQ(ierr);
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPISBAIJ"
727 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
728 {
729   PetscErrorCode ierr;
730   PetscBool      iascii,isdraw,issocket,isbinary;
731 
732   PetscFunctionBegin;
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
734   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
735   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
736   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
737   if (iascii || isdraw || issocket || isbinary) {
738     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
739   } else {
740     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatDestroy_MPISBAIJ"
747 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
748 {
749   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753 #if defined(PETSC_USE_LOG)
754   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
755 #endif
756   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
757   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
758   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
760 #if defined (PETSC_USE_CTABLE)
761   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
762 #else
763   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
764 #endif
765   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
767   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
773   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
776   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
780 #if defined(PETSC_USE_REAL_MAT_SINGLE)
781   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
782 #endif
783   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
786   ierr = PetscFree(mat->data);CHKERRQ(ierr);
787 
788   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
799 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800 {
801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
802   PetscErrorCode ierr;
803   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
804   PetscScalar    *x,*from;
805 
806   PetscFunctionBegin;
807   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
808   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844 
845   /* diagonal part */
846   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
847   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
848 
849   /* subdiagonal part */
850   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
851 
852   /* copy x into the vec slvec0 */
853   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855 
856   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
857   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
858   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
859 
860   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   /* supperdiagonal part */
863   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
869 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
870 {
871   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
872   PetscErrorCode ierr;
873   PetscInt       nt;
874 
875   PetscFunctionBegin;
876   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
877   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
878 
879   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
880   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
881 
882   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   /* do diagonal part */
884   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
885   /* do supperdiagonal part */
886   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
888   /* do subdiagonal part */
889   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
890   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892 
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode ierr;
902   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar    *x,*from,zero=0.0;
904 
905   PetscFunctionBegin;
906   /*
907   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
908   PetscSynchronizedFlush(((PetscObject)A)->comm);
909   */
910   /* diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
912   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
913 
914   /* subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
916 
917   /* copy x into the vec slvec0 */
918   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
921   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
922 
923   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926 
927   /* supperdiagonal part */
928   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
929 
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
935 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
936 {
937   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   /* do diagonal part */
943   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
944   /* do supperdiagonal part */
945   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
947 
948   /* do subdiagonal part */
949   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
950   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952 
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957   This only works correctly for square matrices where the subblock A->A is the
958    diagonal block
959 */
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
969   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatScale_MPISBAIJ"
975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
976 {
977   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
982   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatGetRow_MPISBAIJ"
988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
989 {
990   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
991   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
992   PetscErrorCode ierr;
993   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
994   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
995   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
996 
997   PetscFunctionBegin;
998   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
999   mat->getrowactive = PETSC_TRUE;
1000 
1001   if (!mat->rowvalues && (idx || v)) {
1002     /*
1003         allocate enough space to hold information from the longest row.
1004     */
1005     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1006     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1007     PetscInt     max = 1,mbs = mat->mbs,tmp;
1008     for (i=0; i<mbs; i++) {
1009       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1010       if (max < tmp) { max = tmp; }
1011     }
1012     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1013   }
1014 
1015   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1016   lrow = row - brstart;  /* local row index */
1017 
1018   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1019   if (!v)   {pvA = 0; pvB = 0;}
1020   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1021   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1022   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1023   nztot = nzA + nzB;
1024 
1025   cmap  = mat->garray;
1026   if (v  || idx) {
1027     if (nztot) {
1028       /* Sort by increasing column numbers, assuming A and B already sorted */
1029       PetscInt imark = -1;
1030       if (v) {
1031         *v = v_p = mat->rowvalues;
1032         for (i=0; i<nzB; i++) {
1033           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1034           else break;
1035         }
1036         imark = i;
1037         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1038         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1039       }
1040       if (idx) {
1041         *idx = idx_p = mat->rowindices;
1042         if (imark > -1) {
1043           for (i=0; i<imark; i++) {
1044             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1045           }
1046         } else {
1047           for (i=0; i<nzB; i++) {
1048             if (cmap[cworkB[i]/bs] < cstart)
1049               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1050             else break;
1051           }
1052           imark = i;
1053         }
1054         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1055         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1056       }
1057     } else {
1058       if (idx) *idx = 0;
1059       if (v)   *v   = 0;
1060     }
1061   }
1062   *nz = nztot;
1063   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1064   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1076   baij->getrowactive = PETSC_FALSE;
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1082 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1083 {
1084   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1085   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1086 
1087   PetscFunctionBegin;
1088   aA->getrow_utriangular = PETSC_TRUE;
1089   PetscFunctionReturn(0);
1090 }
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1093 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1094 {
1095   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1096   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1097 
1098   PetscFunctionBegin;
1099   aA->getrow_utriangular = PETSC_FALSE;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1105 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1106 {
1107   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1112   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1118 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1119 {
1120   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1125   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1131 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1132 {
1133   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1138   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1144 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1145 {
1146   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1147   Mat            A = a->A,B = a->B;
1148   PetscErrorCode ierr;
1149   PetscReal      isend[5],irecv[5];
1150 
1151   PetscFunctionBegin;
1152   info->block_size     = (PetscReal)matin->rmap->bs;
1153   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1154   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1155   isend[3] = info->memory;  isend[4] = info->mallocs;
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158   isend[3] += info->memory;  isend[4] += info->mallocs;
1159   if (flag == MAT_LOCAL) {
1160     info->nz_used      = isend[0];
1161     info->nz_allocated = isend[1];
1162     info->nz_unneeded  = isend[2];
1163     info->memory       = isend[3];
1164     info->mallocs      = isend[4];
1165   } else if (flag == MAT_GLOBAL_MAX) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1167     info->nz_used      = irecv[0];
1168     info->nz_allocated = irecv[1];
1169     info->nz_unneeded  = irecv[2];
1170     info->memory       = irecv[3];
1171     info->mallocs      = irecv[4];
1172   } else if (flag == MAT_GLOBAL_SUM) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else {
1180     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1181   }
1182   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1183   info->fill_ratio_needed = 0;
1184   info->factor_mallocs    = 0;
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1190 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1191 {
1192   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1193   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   switch (op) {
1198   case MAT_NEW_NONZERO_LOCATIONS:
1199   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1200   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1201   case MAT_KEEP_NONZERO_PATTERN:
1202   case MAT_NEW_NONZERO_LOCATION_ERR:
1203     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1205     break;
1206   case MAT_ROW_ORIENTED:
1207     a->roworiented = flg;
1208     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1210     break;
1211   case MAT_NEW_DIAGONALS:
1212     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1213     break;
1214   case MAT_IGNORE_OFF_PROC_ENTRIES:
1215     a->donotstash = flg;
1216     break;
1217   case MAT_USE_HASH_TABLE:
1218     a->ht_flag = flg;
1219     break;
1220   case MAT_HERMITIAN:
1221     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1224     break;
1225   case MAT_SPD:
1226     A->spd_set                         = PETSC_TRUE;
1227     A->spd                             = flg;
1228     if (flg) {
1229       A->symmetric                     = PETSC_TRUE;
1230       A->structurally_symmetric        = PETSC_TRUE;
1231       A->symmetric_set                 = PETSC_TRUE;
1232       A->structurally_symmetric_set    = PETSC_TRUE;
1233     }
1234     break;
1235   case MAT_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_STRUCTURALLY_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_SYMMETRY_ETERNAL:
1242     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1243     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1244     break;
1245   case MAT_IGNORE_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_ERROR_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_GETROW_UPPERTRIANGULAR:
1252     aA->getrow_utriangular = flg;
1253     break;
1254   default:
1255     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1262 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1263 {
1264   PetscErrorCode ierr;
1265   PetscFunctionBegin;
1266   if (MAT_INITIAL_MATRIX || *B != A) {
1267     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1274 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1275 {
1276   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1277   Mat            a=baij->A, b=baij->B;
1278   PetscErrorCode ierr;
1279   PetscInt       nv,m,n;
1280   PetscBool      flg;
1281 
1282   PetscFunctionBegin;
1283   if (ll != rr){
1284     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1285     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1286   }
1287   if (!ll) PetscFunctionReturn(0);
1288 
1289   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1290   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1291 
1292   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1293   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1294 
1295   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296 
1297   /* left diagonalscale the off-diagonal part */
1298   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1299 
1300   /* scale the diagonal part */
1301   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1302 
1303   /* right diagonalscale the off-diagonal part */
1304   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1311 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatEqual_MPISBAIJ"
1325 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1326 {
1327   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1328   Mat            a,b,c,d;
1329   PetscBool      flg;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   a = matA->A; b = matA->B;
1334   c = matB->A; d = matB->B;
1335 
1336   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1337   if (flg) {
1338     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1339   }
1340   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatCopy_MPISBAIJ"
1346 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1347 {
1348   PetscErrorCode ierr;
1349   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1350   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1351 
1352   PetscFunctionBegin;
1353   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1354   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1355     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1356     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1357     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1358   } else {
1359     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1360     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1367 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1368 {
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1378 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1379 {
1380   PetscErrorCode ierr;
1381   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1382   PetscBLASInt   bnz,one=1;
1383   Mat_SeqSBAIJ   *xa,*ya;
1384   Mat_SeqBAIJ    *xb,*yb;
1385 
1386   PetscFunctionBegin;
1387   if (str == SAME_NONZERO_PATTERN) {
1388     PetscScalar alpha = a;
1389     xa = (Mat_SeqSBAIJ *)xx->A->data;
1390     ya = (Mat_SeqSBAIJ *)yy->A->data;
1391     bnz = PetscBLASIntCast(xa->nz);
1392     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1393     xb = (Mat_SeqBAIJ *)xx->B->data;
1394     yb = (Mat_SeqBAIJ *)yy->B->data;
1395     bnz = PetscBLASIntCast(xb->nz);
1396     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1397   } else {
1398     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1399     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1400     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1407 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1408 {
1409   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1410   PetscInt        rbs,cbs;
1411   PetscErrorCode  ierr;
1412 
1413   PetscFunctionBegin;
1414   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1415   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1416   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1417   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1418   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1419   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1425 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1426 {
1427   PetscErrorCode ierr;
1428   PetscInt       i;
1429   PetscBool      flg;
1430 
1431   PetscFunctionBegin;
1432   for (i=0; i<n; i++) {
1433     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1434     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1435   }
1436   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 
1441 /* -------------------------------------------------------------------*/
1442 static struct _MatOps MatOps_Values = {
1443        MatSetValues_MPISBAIJ,
1444        MatGetRow_MPISBAIJ,
1445        MatRestoreRow_MPISBAIJ,
1446        MatMult_MPISBAIJ,
1447 /* 4*/ MatMultAdd_MPISBAIJ,
1448        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1449        MatMultAdd_MPISBAIJ,
1450        0,
1451        0,
1452        0,
1453 /*10*/ 0,
1454        0,
1455        0,
1456        MatSOR_MPISBAIJ,
1457        MatTranspose_MPISBAIJ,
1458 /*15*/ MatGetInfo_MPISBAIJ,
1459        MatEqual_MPISBAIJ,
1460        MatGetDiagonal_MPISBAIJ,
1461        MatDiagonalScale_MPISBAIJ,
1462        MatNorm_MPISBAIJ,
1463 /*20*/ MatAssemblyBegin_MPISBAIJ,
1464        MatAssemblyEnd_MPISBAIJ,
1465        MatSetOption_MPISBAIJ,
1466        MatZeroEntries_MPISBAIJ,
1467 /*24*/ 0,
1468        0,
1469        0,
1470        0,
1471        0,
1472 /*29*/ MatSetUp_MPISBAIJ,
1473        0,
1474        0,
1475        0,
1476        0,
1477 /*34*/ MatDuplicate_MPISBAIJ,
1478        0,
1479        0,
1480        0,
1481        0,
1482 /*39*/ MatAXPY_MPISBAIJ,
1483        MatGetSubMatrices_MPISBAIJ,
1484        MatIncreaseOverlap_MPISBAIJ,
1485        MatGetValues_MPISBAIJ,
1486        MatCopy_MPISBAIJ,
1487 /*44*/ 0,
1488        MatScale_MPISBAIJ,
1489        0,
1490        0,
1491        0,
1492 /*49*/ MatSetBlockSize_MPISBAIJ,
1493        0,
1494        0,
1495        0,
1496        0,
1497 /*54*/ 0,
1498        0,
1499        MatSetUnfactored_MPISBAIJ,
1500        0,
1501        MatSetValuesBlocked_MPISBAIJ,
1502 /*59*/ 0,
1503        0,
1504        0,
1505        0,
1506        0,
1507 /*64*/ 0,
1508        0,
1509        0,
1510        0,
1511        0,
1512 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1513        0,
1514        0,
1515        0,
1516        0,
1517 /*74*/ 0,
1518        0,
1519        0,
1520        0,
1521        0,
1522 /*79*/ 0,
1523        0,
1524        0,
1525        0,
1526        MatLoad_MPISBAIJ,
1527 /*84*/ 0,
1528        0,
1529        0,
1530        0,
1531        0,
1532 /*89*/ 0,
1533        0,
1534        0,
1535        0,
1536        0,
1537 /*94*/ 0,
1538        0,
1539        0,
1540        0,
1541        0,
1542 /*99*/ 0,
1543        0,
1544        0,
1545        0,
1546        0,
1547 /*104*/0,
1548        MatRealPart_MPISBAIJ,
1549        MatImaginaryPart_MPISBAIJ,
1550        MatGetRowUpperTriangular_MPISBAIJ,
1551        MatRestoreRowUpperTriangular_MPISBAIJ,
1552 /*109*/0,
1553        0,
1554        0,
1555        0,
1556        0,
1557 /*114*/0,
1558        0,
1559        0,
1560        0,
1561        0,
1562 /*119*/0,
1563        0,
1564        0,
1565        0
1566 };
1567 
1568 
1569 EXTERN_C_BEGIN
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1572 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1573 {
1574   PetscFunctionBegin;
1575   *a = ((Mat_MPISBAIJ *)A->data)->A;
1576   PetscFunctionReturn(0);
1577 }
1578 EXTERN_C_END
1579 
1580 EXTERN_C_BEGIN
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1583 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1584 {
1585   Mat_MPISBAIJ   *b;
1586   PetscErrorCode ierr;
1587   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1588 
1589   PetscFunctionBegin;
1590   if (bs < 0){
1591     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1592       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1593     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1594     bs   = PetscAbs(bs);
1595   }
1596   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1597   bs = newbs;
1598 
1599   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1600   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1601   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1602   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1603 
1604   B->rmap->bs = B->cmap->bs = bs;
1605   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1606   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1607 
1608   if (d_nnz) {
1609     for (i=0; i<B->rmap->n/bs; i++) {
1610       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1611     }
1612   }
1613   if (o_nnz) {
1614     for (i=0; i<B->rmap->n/bs; i++) {
1615       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1616     }
1617   }
1618 
1619   b   = (Mat_MPISBAIJ*)B->data;
1620   mbs = B->rmap->n/bs;
1621   Mbs = B->rmap->N/bs;
1622   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1623 
1624   B->rmap->bs  = bs;
1625   b->bs2 = bs*bs;
1626   b->mbs = mbs;
1627   b->nbs = mbs;
1628   b->Mbs = Mbs;
1629   b->Nbs = Mbs;
1630 
1631   for (i=0; i<=b->size; i++) {
1632     b->rangebs[i] = B->rmap->range[i]/bs;
1633   }
1634   b->rstartbs = B->rmap->rstart/bs;
1635   b->rendbs   = B->rmap->rend/bs;
1636 
1637   b->cstartbs = B->cmap->rstart/bs;
1638   b->cendbs   = B->cmap->rend/bs;
1639 
1640   if (!B->preallocated) {
1641     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1642     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1643     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1644     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1645     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1646     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1647     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1648     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1649     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1650   }
1651 
1652   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1653   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1654   B->preallocated = PETSC_TRUE;
1655   PetscFunctionReturn(0);
1656 }
1657 EXTERN_C_END
1658 
1659 EXTERN_C_BEGIN
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1662 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1663 {
1664   PetscInt       m,rstart,cstart,cend;
1665   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1666   const PetscInt *JJ=0;
1667   PetscScalar    *values=0;
1668   PetscErrorCode ierr;
1669 
1670   PetscFunctionBegin;
1671 
1672   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1673   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1674   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1675   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1676   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1677   m      = B->rmap->n/bs;
1678   rstart = B->rmap->rstart/bs;
1679   cstart = B->cmap->rstart/bs;
1680   cend   = B->cmap->rend/bs;
1681 
1682   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1683   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1684   for (i=0; i<m; i++) {
1685     nz = ii[i+1] - ii[i];
1686     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1687     nz_max = PetscMax(nz_max,nz);
1688     JJ  = jj + ii[i];
1689     for (j=0; j<nz; j++) {
1690       if (*JJ >= cstart) break;
1691       JJ++;
1692     }
1693     d = 0;
1694     for (; j<nz; j++) {
1695       if (*JJ++ >= cend) break;
1696       d++;
1697     }
1698     d_nnz[i] = d;
1699     o_nnz[i] = nz - d;
1700   }
1701   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1702   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1703 
1704   values = (PetscScalar*)V;
1705   if (!values) {
1706     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1707     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1708   }
1709   for (i=0; i<m; i++) {
1710     PetscInt          row    = i + rstart;
1711     PetscInt          ncols  = ii[i+1] - ii[i];
1712     const PetscInt    *icols = jj + ii[i];
1713     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1714     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1715   }
1716 
1717   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1718   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1719   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1720   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1721   PetscFunctionReturn(0);
1722 }
1723 EXTERN_C_END
1724 
1725 EXTERN_C_BEGIN
1726 #if defined(PETSC_HAVE_MUMPS)
1727 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1728 #endif
1729 #if defined(PETSC_HAVE_SPOOLES)
1730 extern PetscErrorCode  MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1731 #endif
1732 #if defined(PETSC_HAVE_PASTIX)
1733 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1734 #endif
1735 EXTERN_C_END
1736 
1737 /*MC
1738    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1739    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1740    the matrix is stored.
1741 
1742   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1743   can call MatSetOption(Mat, MAT_HERMITIAN);
1744 
1745    Options Database Keys:
1746 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1747 
1748   Level: beginner
1749 
1750 .seealso: MatCreateMPISBAIJ
1751 M*/
1752 
1753 EXTERN_C_BEGIN
1754 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*);
1755 EXTERN_C_END
1756 
1757 EXTERN_C_BEGIN
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatCreate_MPISBAIJ"
1760 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1761 {
1762   Mat_MPISBAIJ   *b;
1763   PetscErrorCode ierr;
1764   PetscBool      flg;
1765 
1766   PetscFunctionBegin;
1767 
1768   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1769   B->data = (void*)b;
1770   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1771 
1772   B->ops->destroy    = MatDestroy_MPISBAIJ;
1773   B->ops->view       = MatView_MPISBAIJ;
1774   B->assembled       = PETSC_FALSE;
1775 
1776   B->insertmode = NOT_SET_VALUES;
1777   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1778   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1779 
1780   /* build local table of row and column ownerships */
1781   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1782 
1783   /* build cache for off array entries formed */
1784   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1785   b->donotstash  = PETSC_FALSE;
1786   b->colmap      = PETSC_NULL;
1787   b->garray      = PETSC_NULL;
1788   b->roworiented = PETSC_TRUE;
1789 
1790   /* stuff used in block assembly */
1791   b->barray       = 0;
1792 
1793   /* stuff used for matrix vector multiply */
1794   b->lvec         = 0;
1795   b->Mvctx        = 0;
1796   b->slvec0       = 0;
1797   b->slvec0b      = 0;
1798   b->slvec1       = 0;
1799   b->slvec1a      = 0;
1800   b->slvec1b      = 0;
1801   b->sMvctx       = 0;
1802 
1803   /* stuff for MatGetRow() */
1804   b->rowindices   = 0;
1805   b->rowvalues    = 0;
1806   b->getrowactive = PETSC_FALSE;
1807 
1808   /* hash table stuff */
1809   b->ht           = 0;
1810   b->hd           = 0;
1811   b->ht_size      = 0;
1812   b->ht_flag      = PETSC_FALSE;
1813   b->ht_fact      = 0;
1814   b->ht_total_ct  = 0;
1815   b->ht_insert_ct = 0;
1816 
1817   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1818   b->ijonly       = PETSC_FALSE;
1819 
1820   b->in_loc       = 0;
1821   b->v_loc        = 0;
1822   b->n_loc        = 0;
1823   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1824     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1825     if (flg) {
1826       PetscReal fact = 1.39;
1827       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1828       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1829       if (fact <= 1.0) fact = 1.39;
1830       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1831       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1832     }
1833   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1834 
1835 #if defined(PETSC_HAVE_PASTIX)
1836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1837 					   "MatGetFactor_mpisbaij_pastix",
1838 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1839 #endif
1840 #if defined(PETSC_HAVE_MUMPS)
1841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1842                                      "MatGetFactor_sbaij_mumps",
1843                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1844 #endif
1845 #if defined(PETSC_HAVE_SPOOLES)
1846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1847                                      "MatGetFactor_mpisbaij_spooles",
1848                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1849 #endif
1850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1851                                      "MatStoreValues_MPISBAIJ",
1852                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1854                                      "MatRetrieveValues_MPISBAIJ",
1855                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1857                                      "MatGetDiagonalBlock_MPISBAIJ",
1858                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1859   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1860                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1861                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1862   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1863                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1864                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1865   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1866                                      "MatConvert_MPISBAIJ_MPISBSTRM",
1867                                       MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1868 
1869   B->symmetric                  = PETSC_TRUE;
1870   B->structurally_symmetric     = PETSC_TRUE;
1871   B->symmetric_set              = PETSC_TRUE;
1872   B->structurally_symmetric_set = PETSC_TRUE;
1873   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1874   PetscFunctionReturn(0);
1875 }
1876 EXTERN_C_END
1877 
1878 /*MC
1879    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1880 
1881    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1882    and MATMPISBAIJ otherwise.
1883 
1884    Options Database Keys:
1885 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1886 
1887   Level: beginner
1888 
1889 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1890 M*/
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1894 /*@C
1895    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1896    the user should preallocate the matrix storage by setting the parameters
1897    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1898    performance can be increased by more than a factor of 50.
1899 
1900    Collective on Mat
1901 
1902    Input Parameters:
1903 +  A - the matrix
1904 .  bs   - size of blockk
1905 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1906            submatrix  (same for all local rows)
1907 .  d_nnz - array containing the number of block nonzeros in the various block rows
1908            in the upper triangular and diagonal part of the in diagonal portion of the local
1909            (possibly different for each block row) or PETSC_NULL.  If you plan to factor the matrix you must leave room
1910            for the diagonal entry and set a value even if it is zero.
1911 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1912            submatrix (same for all local rows).
1913 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1914            off-diagonal portion of the local submatrix that is right of the diagonal
1915            (possibly different for each block row) or PETSC_NULL.
1916 
1917 
1918    Options Database Keys:
1919 .   -mat_no_unroll - uses code that does not unroll the loops in the
1920                      block calculations (much slower)
1921 .   -mat_block_size - size of the blocks to use
1922 
1923    Notes:
1924 
1925    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1926    than it must be used on all processors that share the object for that argument.
1927 
1928    If the *_nnz parameter is given then the *_nz parameter is ignored
1929 
1930    Storage Information:
1931    For a square global matrix we define each processor's diagonal portion
1932    to be its local rows and the corresponding columns (a square submatrix);
1933    each processor's off-diagonal portion encompasses the remainder of the
1934    local matrix (a rectangular submatrix).
1935 
1936    The user can specify preallocated storage for the diagonal part of
1937    the local submatrix with either d_nz or d_nnz (not both).  Set
1938    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1939    memory allocation.  Likewise, specify preallocated storage for the
1940    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1941 
1942    You can call MatGetInfo() to get information on how effective the preallocation was;
1943    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1944    You can also run with the option -info and look for messages with the string
1945    malloc in them to see if additional memory allocation was needed.
1946 
1947    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1948    the figure below we depict these three local rows and all columns (0-11).
1949 
1950 .vb
1951            0 1 2 3 4 5 6 7 8 9 10 11
1952           -------------------
1953    row 3  |  . . . d d d o o o o o o
1954    row 4  |  . . . d d d o o o o o o
1955    row 5  |  . . . d d d o o o o o o
1956           -------------------
1957 .ve
1958 
1959    Thus, any entries in the d locations are stored in the d (diagonal)
1960    submatrix, and any entries in the o locations are stored in the
1961    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1962    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1963 
1964    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1965    plus the diagonal part of the d matrix,
1966    and o_nz should indicate the number of block nonzeros per row in the o matrix
1967 
1968    In general, for PDE problems in which most nonzeros are near the diagonal,
1969    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1970    or you will get TERRIBLE performance; see the users' manual chapter on
1971    matrices.
1972 
1973    Level: intermediate
1974 
1975 .keywords: matrix, block, aij, compressed row, sparse, parallel
1976 
1977 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1978 @*/
1979 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1980 {
1981   PetscErrorCode ierr;
1982 
1983   PetscFunctionBegin;
1984   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1985   PetscValidType(B,1);
1986   PetscValidLogicalCollectiveInt(B,bs,2);
1987   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 #undef __FUNCT__
1992 #define __FUNCT__ "MatCreateMPISBAIJ"
1993 /*@C
1994    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1995    (block compressed row).  For good matrix assembly performance
1996    the user should preallocate the matrix storage by setting the parameters
1997    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1998    performance can be increased by more than a factor of 50.
1999 
2000    Collective on MPI_Comm
2001 
2002    Input Parameters:
2003 +  comm - MPI communicator
2004 .  bs   - size of blockk
2005 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2006            This value should be the same as the local size used in creating the
2007            y vector for the matrix-vector product y = Ax.
2008 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2009            This value should be the same as the local size used in creating the
2010            x vector for the matrix-vector product y = Ax.
2011 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2012 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2013 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2014            submatrix  (same for all local rows)
2015 .  d_nnz - array containing the number of block nonzeros in the various block rows
2016            in the upper triangular portion of the in diagonal portion of the local
2017            (possibly different for each block block row) or PETSC_NULL.
2018            If you plan to factor the matrix you must leave room for the diagonal entry and
2019            set its value even if it is zero.
2020 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2021            submatrix (same for all local rows).
2022 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2023            off-diagonal portion of the local submatrix (possibly different for
2024            each block row) or PETSC_NULL.
2025 
2026    Output Parameter:
2027 .  A - the matrix
2028 
2029    Options Database Keys:
2030 .   -mat_no_unroll - uses code that does not unroll the loops in the
2031                      block calculations (much slower)
2032 .   -mat_block_size - size of the blocks to use
2033 .   -mat_mpi - use the parallel matrix data structures even on one processor
2034                (defaults to using SeqBAIJ format on one processor)
2035 
2036    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2037    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2038    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2039 
2040    Notes:
2041    The number of rows and columns must be divisible by blocksize.
2042    This matrix type does not support complex Hermitian operation.
2043 
2044    The user MUST specify either the local or global matrix dimensions
2045    (possibly both).
2046 
2047    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2048    than it must be used on all processors that share the object for that argument.
2049 
2050    If the *_nnz parameter is given then the *_nz parameter is ignored
2051 
2052    Storage Information:
2053    For a square global matrix we define each processor's diagonal portion
2054    to be its local rows and the corresponding columns (a square submatrix);
2055    each processor's off-diagonal portion encompasses the remainder of the
2056    local matrix (a rectangular submatrix).
2057 
2058    The user can specify preallocated storage for the diagonal part of
2059    the local submatrix with either d_nz or d_nnz (not both).  Set
2060    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2061    memory allocation.  Likewise, specify preallocated storage for the
2062    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2063 
2064    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2065    the figure below we depict these three local rows and all columns (0-11).
2066 
2067 .vb
2068            0 1 2 3 4 5 6 7 8 9 10 11
2069           -------------------
2070    row 3  |  . . . d d d o o o o o o
2071    row 4  |  . . . d d d o o o o o o
2072    row 5  |  . . . d d d o o o o o o
2073           -------------------
2074 .ve
2075 
2076    Thus, any entries in the d locations are stored in the d (diagonal)
2077    submatrix, and any entries in the o locations are stored in the
2078    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2079    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2080 
2081    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2082    plus the diagonal part of the d matrix,
2083    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2084    In general, for PDE problems in which most nonzeros are near the diagonal,
2085    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2086    or you will get TERRIBLE performance; see the users' manual chapter on
2087    matrices.
2088 
2089    Level: intermediate
2090 
2091 .keywords: matrix, block, aij, compressed row, sparse, parallel
2092 
2093 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2094 @*/
2095 
2096 PetscErrorCode  MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2097 {
2098   PetscErrorCode ierr;
2099   PetscMPIInt    size;
2100 
2101   PetscFunctionBegin;
2102   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2103   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2105   if (size > 1) {
2106     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2107     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2108   } else {
2109     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2110     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2111   }
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2118 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2119 {
2120   Mat            mat;
2121   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2122   PetscErrorCode ierr;
2123   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2124   PetscScalar    *array;
2125 
2126   PetscFunctionBegin;
2127   *newmat       = 0;
2128   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2129   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2130   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2131   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2132   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2133   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2134 
2135   mat->factortype   = matin->factortype;
2136   mat->preallocated = PETSC_TRUE;
2137   mat->assembled    = PETSC_TRUE;
2138   mat->insertmode   = NOT_SET_VALUES;
2139 
2140   a = (Mat_MPISBAIJ*)mat->data;
2141   a->bs2   = oldmat->bs2;
2142   a->mbs   = oldmat->mbs;
2143   a->nbs   = oldmat->nbs;
2144   a->Mbs   = oldmat->Mbs;
2145   a->Nbs   = oldmat->Nbs;
2146 
2147 
2148   a->size         = oldmat->size;
2149   a->rank         = oldmat->rank;
2150   a->donotstash   = oldmat->donotstash;
2151   a->roworiented  = oldmat->roworiented;
2152   a->rowindices   = 0;
2153   a->rowvalues    = 0;
2154   a->getrowactive = PETSC_FALSE;
2155   a->barray       = 0;
2156   a->rstartbs    = oldmat->rstartbs;
2157   a->rendbs      = oldmat->rendbs;
2158   a->cstartbs    = oldmat->cstartbs;
2159   a->cendbs      = oldmat->cendbs;
2160 
2161   /* hash table stuff */
2162   a->ht           = 0;
2163   a->hd           = 0;
2164   a->ht_size      = 0;
2165   a->ht_flag      = oldmat->ht_flag;
2166   a->ht_fact      = oldmat->ht_fact;
2167   a->ht_total_ct  = 0;
2168   a->ht_insert_ct = 0;
2169 
2170   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2171   if (oldmat->colmap) {
2172 #if defined (PETSC_USE_CTABLE)
2173     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2174 #else
2175     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2176     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2177     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2178 #endif
2179   } else a->colmap = 0;
2180 
2181   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2182     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2183     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2184     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2185   } else a->garray = 0;
2186 
2187   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2188   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2189   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2190   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2191   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2192 
2193   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2194   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2195   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2196   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2197 
2198   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2199   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2200   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2201   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2202   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2203   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2204   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2205   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2206   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2207   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2208   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2209   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2210   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2211 
2212   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2213   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2214   a->sMvctx = oldmat->sMvctx;
2215   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2216 
2217   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2218   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2219   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2220   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2221   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2222   *newmat = mat;
2223   PetscFunctionReturn(0);
2224 }
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatLoad_MPISBAIJ"
2228 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2229 {
2230   PetscErrorCode ierr;
2231   PetscInt       i,nz,j,rstart,rend;
2232   PetscScalar    *vals,*buf;
2233   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2234   MPI_Status     status;
2235   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2236   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2237   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2238   PetscInt       bs=1,Mbs,mbs,extra_rows;
2239   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2240   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2241   int            fd;
2242 
2243   PetscFunctionBegin;
2244   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2245     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2246   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2247 
2248   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2250   if (!rank) {
2251     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2252     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2253     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2254     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2255   }
2256 
2257   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2258 
2259   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2260   M = header[1]; N = header[2];
2261 
2262   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2263   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2264   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2265 
2266   /* If global sizes are set, check if they are consistent with that given in the file */
2267   if (sizesset) {
2268     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2269   }
2270   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2271   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2272 
2273   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2274 
2275   /*
2276      This code adds extra rows to make sure the number of rows is
2277      divisible by the blocksize
2278   */
2279   Mbs        = M/bs;
2280   extra_rows = bs - M + bs*(Mbs);
2281   if (extra_rows == bs) extra_rows = 0;
2282   else                  Mbs++;
2283   if (extra_rows &&!rank) {
2284     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2285   }
2286 
2287   /* determine ownership of all rows */
2288   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2289     mbs        = Mbs/size + ((Mbs % size) > rank);
2290     m          = mbs*bs;
2291   } else { /* User Set */
2292     m          = newmat->rmap->n;
2293     mbs        = m/bs;
2294   }
2295   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2296   mmbs       = PetscMPIIntCast(mbs);
2297   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2298   rowners[0] = 0;
2299   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2300   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2301   rstart = rowners[rank];
2302   rend   = rowners[rank+1];
2303 
2304   /* distribute row lengths to all processors */
2305   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2306   if (!rank) {
2307     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2308     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2309     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2310     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2311     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2312     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2313     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2314   } else {
2315     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2316   }
2317 
2318   if (!rank) {   /* procs[0] */
2319     /* calculate the number of nonzeros on each processor */
2320     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2321     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2322     for (i=0; i<size; i++) {
2323       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2324         procsnz[i] += rowlengths[j];
2325       }
2326     }
2327     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2328 
2329     /* determine max buffer needed and allocate it */
2330     maxnz = 0;
2331     for (i=0; i<size; i++) {
2332       maxnz = PetscMax(maxnz,procsnz[i]);
2333     }
2334     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2335 
2336     /* read in my part of the matrix column indices  */
2337     nz     = procsnz[0];
2338     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2339     mycols = ibuf;
2340     if (size == 1)  nz -= extra_rows;
2341     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2342     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2343 
2344     /* read in every ones (except the last) and ship off */
2345     for (i=1; i<size-1; i++) {
2346       nz   = procsnz[i];
2347       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2348       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2349     }
2350     /* read in the stuff for the last proc */
2351     if (size != 1) {
2352       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2353       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2354       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2355       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2356     }
2357     ierr = PetscFree(cols);CHKERRQ(ierr);
2358   } else {  /* procs[i], i>0 */
2359     /* determine buffer space needed for message */
2360     nz = 0;
2361     for (i=0; i<m; i++) {
2362       nz += locrowlens[i];
2363     }
2364     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2365     mycols = ibuf;
2366     /* receive message of column indices*/
2367     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2368     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2369     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2370   }
2371 
2372   /* loop over local rows, determining number of off diagonal entries */
2373   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2374   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2375   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2376   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2377   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2378   rowcount = 0;
2379   nzcount  = 0;
2380   for (i=0; i<mbs; i++) {
2381     dcount  = 0;
2382     odcount = 0;
2383     for (j=0; j<bs; j++) {
2384       kmax = locrowlens[rowcount];
2385       for (k=0; k<kmax; k++) {
2386         tmp = mycols[nzcount++]/bs; /* block col. index */
2387         if (!mask[tmp]) {
2388           mask[tmp] = 1;
2389           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2390           else masked1[dcount++] = tmp; /* entry in diag portion */
2391         }
2392       }
2393       rowcount++;
2394     }
2395 
2396     dlens[i]  = dcount;  /* d_nzz[i] */
2397     odlens[i] = odcount; /* o_nzz[i] */
2398 
2399     /* zero out the mask elements we set */
2400     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2401     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2402   }
2403     if (!sizesset) {
2404     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2405   }
2406   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2407   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2408 
2409   if (!rank) {
2410     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2411     /* read in my part of the matrix numerical values  */
2412     nz = procsnz[0];
2413     vals = buf;
2414     mycols = ibuf;
2415     if (size == 1)  nz -= extra_rows;
2416     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2417     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2418 
2419     /* insert into matrix */
2420     jj      = rstart*bs;
2421     for (i=0; i<m; i++) {
2422       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2423       mycols += locrowlens[i];
2424       vals   += locrowlens[i];
2425       jj++;
2426     }
2427 
2428     /* read in other processors (except the last one) and ship out */
2429     for (i=1; i<size-1; i++) {
2430       nz   = procsnz[i];
2431       vals = buf;
2432       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2433       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2434     }
2435     /* the last proc */
2436     if (size != 1){
2437       nz   = procsnz[i] - extra_rows;
2438       vals = buf;
2439       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2440       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2441       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2442     }
2443     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2444 
2445   } else {
2446     /* receive numeric values */
2447     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2448 
2449     /* receive message of values*/
2450     vals   = buf;
2451     mycols = ibuf;
2452     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2453     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2454     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2455 
2456     /* insert into matrix */
2457     jj      = rstart*bs;
2458     for (i=0; i<m; i++) {
2459       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2460       mycols += locrowlens[i];
2461       vals   += locrowlens[i];
2462       jj++;
2463     }
2464   }
2465 
2466   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2467   ierr = PetscFree(buf);CHKERRQ(ierr);
2468   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2469   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2470   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2471   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2472   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2473   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2479 /*XXXXX@
2480    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2481 
2482    Input Parameters:
2483 .  mat  - the matrix
2484 .  fact - factor
2485 
2486    Not Collective on Mat, each process can have a different hash factor
2487 
2488    Level: advanced
2489 
2490   Notes:
2491    This can also be set by the command line option: -mat_use_hash_table fact
2492 
2493 .keywords: matrix, hashtable, factor, HT
2494 
2495 .seealso: MatSetOption()
2496 @XXXXX*/
2497 
2498 
2499 #undef __FUNCT__
2500 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2501 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2502 {
2503   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2504   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2505   PetscReal      atmp;
2506   PetscReal      *work,*svalues,*rvalues;
2507   PetscErrorCode ierr;
2508   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2509   PetscMPIInt    rank,size;
2510   PetscInt       *rowners_bs,dest,count,source;
2511   PetscScalar    *va;
2512   MatScalar      *ba;
2513   MPI_Status     stat;
2514 
2515   PetscFunctionBegin;
2516   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2517   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2518   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2519 
2520   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2521   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2522 
2523   bs   = A->rmap->bs;
2524   mbs  = a->mbs;
2525   Mbs  = a->Mbs;
2526   ba   = b->a;
2527   bi   = b->i;
2528   bj   = b->j;
2529 
2530   /* find ownerships */
2531   rowners_bs = A->rmap->range;
2532 
2533   /* each proc creates an array to be distributed */
2534   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2535   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2536 
2537   /* row_max for B */
2538   if (rank != size-1){
2539     for (i=0; i<mbs; i++) {
2540       ncols = bi[1] - bi[0]; bi++;
2541       brow  = bs*i;
2542       for (j=0; j<ncols; j++){
2543         bcol = bs*(*bj);
2544         for (kcol=0; kcol<bs; kcol++){
2545           col = bcol + kcol;                 /* local col index */
2546           col += rowners_bs[rank+1];      /* global col index */
2547           for (krow=0; krow<bs; krow++){
2548             atmp = PetscAbsScalar(*ba); ba++;
2549             row = brow + krow;    /* local row index */
2550             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2551             if (work[col] < atmp) work[col] = atmp;
2552           }
2553         }
2554         bj++;
2555       }
2556     }
2557 
2558     /* send values to its owners */
2559     for (dest=rank+1; dest<size; dest++){
2560       svalues = work + rowners_bs[dest];
2561       count   = rowners_bs[dest+1]-rowners_bs[dest];
2562       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2563     }
2564   }
2565 
2566   /* receive values */
2567   if (rank){
2568     rvalues = work;
2569     count   = rowners_bs[rank+1]-rowners_bs[rank];
2570     for (source=0; source<rank; source++){
2571       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2572       /* process values */
2573       for (i=0; i<count; i++){
2574         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2575       }
2576     }
2577   }
2578 
2579   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2580   ierr = PetscFree(work);CHKERRQ(ierr);
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatSOR_MPISBAIJ"
2586 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2587 {
2588   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2589   PetscErrorCode    ierr;
2590   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2591   PetscScalar       *x,*ptr,*from;
2592   Vec               bb1;
2593   const PetscScalar *b;
2594 
2595   PetscFunctionBegin;
2596   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2597   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2598 
2599   if (flag == SOR_APPLY_UPPER) {
2600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2601     PetscFunctionReturn(0);
2602   }
2603 
2604   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2605     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2606       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2607       its--;
2608     }
2609 
2610     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2611     while (its--){
2612 
2613       /* lower triangular part: slvec0b = - B^T*xx */
2614       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2615 
2616       /* copy xx into slvec0a */
2617       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2618       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2619       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2620       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2621 
2622       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2623 
2624       /* copy bb into slvec1a */
2625       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2626       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2627       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2628       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2629 
2630       /* set slvec1b = 0 */
2631       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2632 
2633       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2634       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2635       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2636       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2637 
2638       /* upper triangular part: bb1 = bb1 - B*x */
2639       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2640 
2641       /* local diagonal sweep */
2642       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2643     }
2644     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2645   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2646     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2647   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2648     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2649   } else if (flag & SOR_EISENSTAT) {
2650     Vec               xx1;
2651     PetscBool         hasop;
2652     const PetscScalar *diag;
2653     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2654     PetscInt          i,n;
2655 
2656     if (!mat->xx1) {
2657       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2658       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2659     }
2660     xx1 = mat->xx1;
2661     bb1 = mat->bb1;
2662 
2663     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2664 
2665     if (!mat->diag) {
2666       /* this is wrong for same matrix with new nonzero values */
2667       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2668       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2669     }
2670     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2671 
2672     if (hasop) {
2673       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2674       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2675     } else {
2676       /*
2677           These two lines are replaced by code that may be a bit faster for a good compiler
2678       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2679       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2680       */
2681       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2682       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2683       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2684       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2685       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2686       if (omega == 1.0) {
2687 	for (i=0; i<n; i++) {
2688 	  sl[i] = b[i] - diag[i]*x[i];
2689 	}
2690         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2691       } else {
2692 	for (i=0; i<n; i++) {
2693 	  sl[i] = b[i] + scale*diag[i]*x[i];
2694 	}
2695         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2696       }
2697       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2698       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2699       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2700       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2701     }
2702 
2703     /* multiply off-diagonal portion of matrix */
2704     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2705     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2706     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2707     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2708     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2709     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2710     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2711     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2712     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2713     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2714 
2715     /* local sweep */
2716     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2717     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2718   } else {
2719     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2720   }
2721   PetscFunctionReturn(0);
2722 }
2723 
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2726 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2727 {
2728   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2729   PetscErrorCode ierr;
2730   Vec            lvec1,bb1;
2731 
2732   PetscFunctionBegin;
2733   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2734   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2735 
2736   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2737     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2738       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2739       its--;
2740     }
2741 
2742     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2743     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2744     while (its--){
2745       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2746 
2747       /* lower diagonal part: bb1 = bb - B^T*xx */
2748       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2749       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2750 
2751       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2752       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2753       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2754 
2755       /* upper diagonal part: bb1 = bb1 - B*x */
2756       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2757       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2758 
2759       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2760 
2761       /* diagonal sweep */
2762       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2763     }
2764     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2765     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2766   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2767   PetscFunctionReturn(0);
2768 }
2769 
2770 #undef __FUNCT__
2771 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2772 /*@
2773      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2774          CSR format the local rows.
2775 
2776    Collective on MPI_Comm
2777 
2778    Input Parameters:
2779 +  comm - MPI communicator
2780 .  bs - the block size, only a block size of 1 is supported
2781 .  m - number of local rows (Cannot be PETSC_DECIDE)
2782 .  n - This value should be the same as the local size used in creating the
2783        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2784        calculated if N is given) For square matrices n is almost always m.
2785 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2786 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2787 .   i - row indices
2788 .   j - column indices
2789 -   a - matrix values
2790 
2791    Output Parameter:
2792 .   mat - the matrix
2793 
2794    Level: intermediate
2795 
2796    Notes:
2797        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2798      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2799      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2800 
2801        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2802 
2803 .keywords: matrix, aij, compressed row, sparse, parallel
2804 
2805 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2806           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
2807 @*/
2808 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2809 {
2810   PetscErrorCode ierr;
2811 
2812 
2813  PetscFunctionBegin;
2814   if (i[0]) {
2815     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2816   }
2817   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2818   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2819   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2820   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2821   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 
2826 #undef __FUNCT__
2827 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2828 /*@C
2829    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2830    (the default parallel PETSc format).
2831 
2832    Collective on MPI_Comm
2833 
2834    Input Parameters:
2835 +  A - the matrix
2836 .  bs - the block size
2837 .  i - the indices into j for the start of each local row (starts with zero)
2838 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2839 -  v - optional values in the matrix
2840 
2841    Level: developer
2842 
2843 .keywords: matrix, aij, compressed row, sparse, parallel
2844 
2845 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2846 @*/
2847 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2848 {
2849   PetscErrorCode ierr;
2850 
2851   PetscFunctionBegin;
2852   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2853   PetscFunctionReturn(0);
2854 }
2855 
2856 
2857