xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 8ddb5d8b44839bc6385c771bd2d6d5c8cd353003)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash){
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs){
189           if (a->ignore_ltriangular){
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined (PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B = baij->B;
222               b = (Mat_SeqBAIJ*)(B)->data;
223               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba=b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         n_loc = 0;
236         for (j=0; j<n; j++){
237           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
238           in_loc[n_loc] = in[j];
239           if (roworiented) {
240             v_loc[n_loc] = v[i*n+j];
241           } else {
242             v_loc[n_loc] = v[j*m+i];
243           }
244           n_loc++;
245         }
246         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
247       }
248     }
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
256 {
257   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
258   const MatScalar *value;
259   MatScalar       *barray=baij->barray;
260   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
261   PetscErrorCode  ierr;
262   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
263   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
264   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
265 
266   PetscFunctionBegin;
267   if(!barray) {
268     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
269     baij->barray = barray;
270   }
271 
272   if (roworiented) {
273     stepval = (n-1)*bs;
274   } else {
275     stepval = (m-1)*bs;
276   }
277   for (i=0; i<m; i++) {
278     if (im[i] < 0) continue;
279 #if defined(PETSC_USE_DEBUG)
280     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
281 #endif
282     if (im[i] >= rstart && im[i] < rend) {
283       row = im[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (im[i] > in[j]) {
286           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
287           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
288         }
289         /* If NumCol = 1 then a copy is not required */
290         if ((roworiented) && (n == 1)) {
291           barray = (MatScalar*) v + i*bs2;
292         } else if((!roworiented) && (m == 1)) {
293           barray = (MatScalar*) v + j*bs2;
294         } else { /* Here a copy is required */
295           if (roworiented) {
296             value = v + i*(stepval+bs)*bs + j*bs;
297           } else {
298             value = v + j*(stepval+bs)*bs + i*bs;
299           }
300           for (ii=0; ii<bs; ii++,value+=stepval) {
301             for (jj=0; jj<bs; jj++) {
302               *barray++  = *value++;
303             }
304           }
305           barray -=bs2;
306         }
307 
308         if (in[j] >= cstart && in[j] < cend){
309           col  = in[j] - cstart;
310           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
311         }
312         else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined (PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined (PETSC_USE_CTABLE)
333 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
349       if (!baij->donotstash) {
350         if (roworiented) {
351           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
352         } else {
353           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
354         }
355       }
356     }
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatGetValues_MPISBAIJ"
363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
364 {
365   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
366   PetscErrorCode ierr;
367   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
368   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
369 
370   PetscFunctionBegin;
371   for (i=0; i<m; i++) {
372     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
373     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
374     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
375       row = idxm[i] - bsrstart;
376       for (j=0; j<n; j++) {
377         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
378         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
379         if (idxn[j] >= bscstart && idxn[j] < bscend){
380           col = idxn[j] - bscstart;
381           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
382         } else {
383           if (!baij->colmap) {
384             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
385           }
386 #if defined (PETSC_USE_CTABLE)
387           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
388           data --;
389 #else
390           data = baij->colmap[idxn[j]/bs]-1;
391 #endif
392           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
393           else {
394             col  = data + idxn[j]%bs;
395             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
396           }
397         }
398       }
399     } else {
400       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
401     }
402   }
403  PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatNorm_MPISBAIJ"
408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
409 {
410   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscReal      sum[2],*lnorm2;
413 
414   PetscFunctionBegin;
415   if (baij->size == 1) {
416     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
417   } else {
418     if (type == NORM_FROBENIUS) {
419       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
420       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
421       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
422       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
423       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
424       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
425       *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
426       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
427     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
428       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
429       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
430       PetscReal    *rsum,*rsum2,vabs;
431       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
432       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
433       MatScalar    *v;
434 
435       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
436       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
437       /* Amat */
438       v = amat->a; jj = amat->j;
439       for (brow=0; brow<mbs; brow++) {
440         grow = bs*(rstart + brow);
441         nz = amat->i[brow+1] - amat->i[brow];
442         for (bcol=0; bcol<nz; bcol++){
443           gcol = bs*(rstart + *jj); jj++;
444           for (col=0; col<bs; col++){
445             for (row=0; row<bs; row++){
446               vabs = PetscAbsScalar(*v); v++;
447               rsum[gcol+col] += vabs;
448               /* non-diagonal block */
449               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
450             }
451           }
452         }
453       }
454       /* Bmat */
455       v = bmat->a; jj = bmat->j;
456       for (brow=0; brow<mbs; brow++) {
457         grow = bs*(rstart + brow);
458         nz = bmat->i[brow+1] - bmat->i[brow];
459         for (bcol=0; bcol<nz; bcol++){
460           gcol = bs*garray[*jj]; jj++;
461           for (col=0; col<bs; col++){
462             for (row=0; row<bs; row++){
463               vabs = PetscAbsScalar(*v); v++;
464               rsum[gcol+col] += vabs;
465               rsum[grow+row] += vabs;
466             }
467           }
468         }
469       }
470       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
471       *norm = 0.0;
472       for (col=0; col<mat->cmap->N; col++) {
473         if (rsum2[col] > *norm) *norm = rsum2[col];
474       }
475       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
476     } else {
477       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
478     }
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
486 {
487   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
488   PetscErrorCode ierr;
489   PetscInt       nstash,reallocs;
490   InsertMode     addv;
491 
492   PetscFunctionBegin;
493   if (baij->donotstash || mat->nooffprocentries) {
494     PetscFunctionReturn(0);
495   }
496 
497   /* make sure all processors are either in INSERTMODE or ADDMODE */
498   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
499   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
500   mat->insertmode = addv; /* in case this processor had no cache */
501 
502   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
503   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
504   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
505   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
506   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
507   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
513 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
514 {
515   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
516   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
517   PetscErrorCode ierr;
518   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
519   PetscInt       *row,*col;
520   PetscBool      other_disassembled;
521   PetscMPIInt    n;
522   PetscBool      r1,r2,r3;
523   MatScalar      *val;
524   InsertMode     addv = mat->insertmode;
525 
526   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
527   PetscFunctionBegin;
528 
529   if (!baij->donotstash &&  !mat->nooffprocentries) {
530     while (1) {
531       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
532       if (!flg) break;
533 
534       for (i=0; i<n;) {
535         /* Now identify the consecutive vals belonging to the same row */
536         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
537         if (j < n) ncols = j-i;
538         else       ncols = n-i;
539         /* Now assemble all these values with a single function call */
540         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
541         i = j;
542       }
543     }
544     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
545     /* Now process the block-stash. Since the values are stashed column-oriented,
546        set the roworiented flag to column oriented, and after MatSetValues()
547        restore the original flags */
548     r1 = baij->roworiented;
549     r2 = a->roworiented;
550     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
551     baij->roworiented = PETSC_FALSE;
552     a->roworiented    = PETSC_FALSE;
553     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
554     while (1) {
555       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
556       if (!flg) break;
557 
558       for (i=0; i<n;) {
559         /* Now identify the consecutive vals belonging to the same row */
560         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
561         if (j < n) ncols = j-i;
562         else       ncols = n-i;
563         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
564         i = j;
565       }
566     }
567     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
568     baij->roworiented = r1;
569     a->roworiented    = r2;
570     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
571   }
572 
573   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
591   }
592   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
593   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
594   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
595 
596   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
597   baij->rowvalues = 0;
598 
599   PetscFunctionReturn(0);
600 }
601 
602 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
603 #undef __FUNCT__
604 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
605 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
606 {
607   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
608   PetscErrorCode    ierr;
609   PetscInt          bs = mat->rmap->bs;
610   PetscMPIInt       size = baij->size,rank = baij->rank;
611   PetscBool         iascii,isdraw;
612   PetscViewer       sviewer;
613   PetscViewerFormat format;
614 
615   PetscFunctionBegin;
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
618   if (iascii) {
619     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
620     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
621       MatInfo info;
622       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
623       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
624       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
632       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
633       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
634       PetscFunctionReturn(0);
635     } else if (format == PETSC_VIEWER_ASCII_INFO) {
636       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
637       PetscFunctionReturn(0);
638     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
639       PetscFunctionReturn(0);
640     }
641   }
642 
643   if (isdraw) {
644     PetscDraw  draw;
645     PetscBool  isnull;
646     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
647     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
648   }
649 
650   if (size == 1) {
651     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
652     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
653   } else {
654     /* assemble the entire matrix onto first processor. */
655     Mat          A;
656     Mat_SeqSBAIJ *Aloc;
657     Mat_SeqBAIJ  *Bloc;
658     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
659     MatScalar    *a;
660 
661     /* Should this be the same type as mat? */
662     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
663     if (!rank) {
664       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
665     } else {
666       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
667     }
668     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
669     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
670     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
671     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
672 
673     /* copy over the A part */
674     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
675     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
676     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
677 
678     for (i=0; i<mbs; i++) {
679       rvals[0] = bs*(baij->rstartbs + i);
680       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
681       for (j=ai[i]; j<ai[i+1]; j++) {
682         col = (baij->cstartbs+aj[j])*bs;
683         for (k=0; k<bs; k++) {
684           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
685           col++; a += bs;
686         }
687       }
688     }
689     /* copy over the B part */
690     Bloc = (Mat_SeqBAIJ*)baij->B->data;
691     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
692     for (i=0; i<mbs; i++) {
693 
694       rvals[0] = bs*(baij->rstartbs + i);
695       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
696       for (j=ai[i]; j<ai[i+1]; j++) {
697         col = baij->garray[aj[j]]*bs;
698         for (k=0; k<bs; k++) {
699           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
700           col++; a += bs;
701         }
702       }
703     }
704     ierr = PetscFree(rvals);CHKERRQ(ierr);
705     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     /*
708        Everyone has to call to draw the matrix since the graphics waits are
709        synchronized across all processors that share the PetscDraw object
710     */
711     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
712     if (!rank) {
713       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
714           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
715       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
716       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
717     }
718     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
719     ierr = MatDestroy(&A);CHKERRQ(ierr);
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatView_MPISBAIJ"
726 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
727 {
728   PetscErrorCode ierr;
729   PetscBool      iascii,isdraw,issocket,isbinary;
730 
731   PetscFunctionBegin;
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
734   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
735   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
736   if (iascii || isdraw || issocket || isbinary) {
737     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
738   } else {
739     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatDestroy_MPISBAIJ"
746 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
747 {
748   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752 #if defined(PETSC_USE_LOG)
753   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
754 #endif
755   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
756   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
757   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
758   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
759 #if defined (PETSC_USE_CTABLE)
760   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
761 #else
762   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
763 #endif
764   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
765   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
766   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
772   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
773   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
779 #if defined(PETSC_USE_REAL_MAT_SINGLE)
780   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
781 #endif
782   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
785   ierr = PetscFree(mat->data);CHKERRQ(ierr);
786 
787   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
798 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
799 {
800   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
801   PetscErrorCode ierr;
802   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
803   PetscScalar    *x,*from;
804 
805   PetscFunctionBegin;
806   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
807   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
808 
809   /* diagonal part */
810   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
811   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
812 
813   /* subdiagonal part */
814   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
815 
816   /* copy x into the vec slvec0 */
817   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
818   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
819 
820   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
821   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
822   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
823 
824   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   /* supperdiagonal part */
827   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
828   PetscFunctionReturn(0);
829 }
830 
831 #undef __FUNCT__
832 #define __FUNCT__ "MatMult_MPISBAIJ"
833 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
834 {
835   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
836   PetscErrorCode ierr;
837   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
838   PetscScalar    *x,*from;
839 
840   PetscFunctionBegin;
841   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
842   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
843 
844   /* diagonal part */
845   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
846   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
847 
848   /* subdiagonal part */
849   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
850 
851   /* copy x into the vec slvec0 */
852   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
853   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
854 
855   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
856   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
857   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
858 
859   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   /* supperdiagonal part */
862   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
868 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
869 {
870   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
871   PetscErrorCode ierr;
872   PetscInt       nt;
873 
874   PetscFunctionBegin;
875   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
876   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
877 
878   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
879   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
880 
881   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   /* do diagonal part */
883   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
884   /* do supperdiagonal part */
885   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
887   /* do subdiagonal part */
888   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
889   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
890   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891 
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
897 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
898 {
899   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
900   PetscErrorCode ierr;
901   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
902   PetscScalar    *x,*from,zero=0.0;
903 
904   PetscFunctionBegin;
905   /*
906   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
907   PetscSynchronizedFlush(((PetscObject)A)->comm);
908   */
909   /* diagonal part */
910   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
911   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
912 
913   /* subdiagonal part */
914   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
915 
916   /* copy x into the vec slvec0 */
917   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
918   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
919   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
920   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
921 
922   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
924   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925 
926   /* supperdiagonal part */
927   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
928 
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
934 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
935 {
936   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
937   PetscErrorCode ierr;
938 
939   PetscFunctionBegin;
940   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941   /* do diagonal part */
942   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
943   /* do supperdiagonal part */
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
946 
947   /* do subdiagonal part */
948   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
949   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951 
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956   This only works correctly for square matrices where the subblock A->A is the
957    diagonal block
958 */
959 #undef __FUNCT__
960 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
961 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
962 {
963   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
964   PetscErrorCode ierr;
965 
966   PetscFunctionBegin;
967   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
968   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatScale_MPISBAIJ"
974 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
975 {
976   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
977   PetscErrorCode ierr;
978 
979   PetscFunctionBegin;
980   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
981   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatGetRow_MPISBAIJ"
987 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
988 {
989   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
990   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
991   PetscErrorCode ierr;
992   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
993   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
994   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
995 
996   PetscFunctionBegin;
997   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
998   mat->getrowactive = PETSC_TRUE;
999 
1000   if (!mat->rowvalues && (idx || v)) {
1001     /*
1002         allocate enough space to hold information from the longest row.
1003     */
1004     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1005     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1006     PetscInt     max = 1,mbs = mat->mbs,tmp;
1007     for (i=0; i<mbs; i++) {
1008       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1009       if (max < tmp) { max = tmp; }
1010     }
1011     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1012   }
1013 
1014   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1015   lrow = row - brstart;  /* local row index */
1016 
1017   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1018   if (!v)   {pvA = 0; pvB = 0;}
1019   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1020   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1021   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1022   nztot = nzA + nzB;
1023 
1024   cmap  = mat->garray;
1025   if (v  || idx) {
1026     if (nztot) {
1027       /* Sort by increasing column numbers, assuming A and B already sorted */
1028       PetscInt imark = -1;
1029       if (v) {
1030         *v = v_p = mat->rowvalues;
1031         for (i=0; i<nzB; i++) {
1032           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1033           else break;
1034         }
1035         imark = i;
1036         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1037         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1038       }
1039       if (idx) {
1040         *idx = idx_p = mat->rowindices;
1041         if (imark > -1) {
1042           for (i=0; i<imark; i++) {
1043             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1044           }
1045         } else {
1046           for (i=0; i<nzB; i++) {
1047             if (cmap[cworkB[i]/bs] < cstart)
1048               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1049             else break;
1050           }
1051           imark = i;
1052         }
1053         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1054         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1055       }
1056     } else {
1057       if (idx) *idx = 0;
1058       if (v)   *v   = 0;
1059     }
1060   }
1061   *nz = nztot;
1062   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1063   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1069 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1070 {
1071   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1072 
1073   PetscFunctionBegin;
1074   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1075   baij->getrowactive = PETSC_FALSE;
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1081 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1082 {
1083   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1084   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1085 
1086   PetscFunctionBegin;
1087   aA->getrow_utriangular = PETSC_TRUE;
1088   PetscFunctionReturn(0);
1089 }
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1092 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1093 {
1094   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1095   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1096 
1097   PetscFunctionBegin;
1098   aA->getrow_utriangular = PETSC_FALSE;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1104 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1105 {
1106   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1111   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1117 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1118 {
1119   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1120   PetscErrorCode ierr;
1121 
1122   PetscFunctionBegin;
1123   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1124   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1130 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1131 {
1132   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1133   PetscErrorCode ierr;
1134 
1135   PetscFunctionBegin;
1136   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1137   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1143 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1144 {
1145   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1146   Mat            A = a->A,B = a->B;
1147   PetscErrorCode ierr;
1148   PetscReal      isend[5],irecv[5];
1149 
1150   PetscFunctionBegin;
1151   info->block_size     = (PetscReal)matin->rmap->bs;
1152   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1153   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1154   isend[3] = info->memory;  isend[4] = info->mallocs;
1155   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1156   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1157   isend[3] += info->memory;  isend[4] += info->mallocs;
1158   if (flag == MAT_LOCAL) {
1159     info->nz_used      = isend[0];
1160     info->nz_allocated = isend[1];
1161     info->nz_unneeded  = isend[2];
1162     info->memory       = isend[3];
1163     info->mallocs      = isend[4];
1164   } else if (flag == MAT_GLOBAL_MAX) {
1165     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1166     info->nz_used      = irecv[0];
1167     info->nz_allocated = irecv[1];
1168     info->nz_unneeded  = irecv[2];
1169     info->memory       = irecv[3];
1170     info->mallocs      = irecv[4];
1171   } else if (flag == MAT_GLOBAL_SUM) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else {
1179     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1180   }
1181   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1182   info->fill_ratio_needed = 0;
1183   info->factor_mallocs    = 0;
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1189 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1190 {
1191   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1192   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196   switch (op) {
1197   case MAT_NEW_NONZERO_LOCATIONS:
1198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1199   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1200   case MAT_KEEP_NONZERO_PATTERN:
1201   case MAT_NEW_NONZERO_LOCATION_ERR:
1202     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1203     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1204     break;
1205   case MAT_ROW_ORIENTED:
1206     a->roworiented = flg;
1207     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1208     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1209     break;
1210   case MAT_NEW_DIAGONALS:
1211     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1212     break;
1213   case MAT_IGNORE_OFF_PROC_ENTRIES:
1214     a->donotstash = flg;
1215     break;
1216   case MAT_USE_HASH_TABLE:
1217     a->ht_flag = flg;
1218     break;
1219   case MAT_HERMITIAN:
1220     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1221     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1222     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1223     break;
1224   case MAT_SPD:
1225     A->spd_set                         = PETSC_TRUE;
1226     A->spd                             = flg;
1227     if (flg) {
1228       A->symmetric                     = PETSC_TRUE;
1229       A->structurally_symmetric        = PETSC_TRUE;
1230       A->symmetric_set                 = PETSC_TRUE;
1231       A->structurally_symmetric_set    = PETSC_TRUE;
1232     }
1233     break;
1234   case MAT_SYMMETRIC:
1235     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1236     break;
1237   case MAT_STRUCTURALLY_SYMMETRIC:
1238     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1239     break;
1240   case MAT_SYMMETRY_ETERNAL:
1241     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1242     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1243     break;
1244   case MAT_IGNORE_LOWER_TRIANGULAR:
1245     aA->ignore_ltriangular = flg;
1246     break;
1247   case MAT_ERROR_LOWER_TRIANGULAR:
1248     aA->ignore_ltriangular = flg;
1249     break;
1250   case MAT_GETROW_UPPERTRIANGULAR:
1251     aA->getrow_utriangular = flg;
1252     break;
1253   default:
1254     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1261 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1262 {
1263   PetscErrorCode ierr;
1264   PetscFunctionBegin;
1265   if (MAT_INITIAL_MATRIX || *B != A) {
1266     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1273 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1274 {
1275   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1276   Mat            a=baij->A, b=baij->B;
1277   PetscErrorCode ierr;
1278   PetscInt       nv,m,n;
1279   PetscBool      flg;
1280 
1281   PetscFunctionBegin;
1282   if (ll != rr){
1283     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1284     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1285   }
1286   if (!ll) PetscFunctionReturn(0);
1287 
1288   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1289   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1290 
1291   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1292   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1293 
1294   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1295 
1296   /* left diagonalscale the off-diagonal part */
1297   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1298 
1299   /* scale the diagonal part */
1300   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1301 
1302   /* right diagonalscale the off-diagonal part */
1303   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1304   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1310 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1311 {
1312   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatEqual_MPISBAIJ"
1324 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1325 {
1326   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1327   Mat            a,b,c,d;
1328   PetscBool      flg;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   a = matA->A; b = matA->B;
1333   c = matB->A; d = matB->B;
1334 
1335   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1336   if (flg) {
1337     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1338   }
1339   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatCopy_MPISBAIJ"
1345 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1346 {
1347   PetscErrorCode ierr;
1348   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1349   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1350 
1351   PetscFunctionBegin;
1352   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1353   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1354     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1355     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1356     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1357   } else {
1358     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1359     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1366 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1367 {
1368   PetscErrorCode ierr;
1369 
1370   PetscFunctionBegin;
1371   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1377 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1378 {
1379   PetscErrorCode ierr;
1380   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1381   PetscBLASInt   bnz,one=1;
1382   Mat_SeqSBAIJ   *xa,*ya;
1383   Mat_SeqBAIJ    *xb,*yb;
1384 
1385   PetscFunctionBegin;
1386   if (str == SAME_NONZERO_PATTERN) {
1387     PetscScalar alpha = a;
1388     xa = (Mat_SeqSBAIJ *)xx->A->data;
1389     ya = (Mat_SeqSBAIJ *)yy->A->data;
1390     bnz = PetscBLASIntCast(xa->nz);
1391     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1392     xb = (Mat_SeqBAIJ *)xx->B->data;
1393     yb = (Mat_SeqBAIJ *)yy->B->data;
1394     bnz = PetscBLASIntCast(xb->nz);
1395     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1396   } else {
1397     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1398     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1399     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1406 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1407 {
1408   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1409   PetscInt        rbs,cbs;
1410   PetscErrorCode  ierr;
1411 
1412   PetscFunctionBegin;
1413   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1414   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1415   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1416   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1417   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1418   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1424 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1425 {
1426   PetscErrorCode ierr;
1427   PetscInt       i;
1428   PetscBool      flg;
1429 
1430   PetscFunctionBegin;
1431   for (i=0; i<n; i++) {
1432     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1433     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1434   }
1435   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 
1440 /* -------------------------------------------------------------------*/
1441 static struct _MatOps MatOps_Values = {
1442        MatSetValues_MPISBAIJ,
1443        MatGetRow_MPISBAIJ,
1444        MatRestoreRow_MPISBAIJ,
1445        MatMult_MPISBAIJ,
1446 /* 4*/ MatMultAdd_MPISBAIJ,
1447        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1448        MatMultAdd_MPISBAIJ,
1449        0,
1450        0,
1451        0,
1452 /*10*/ 0,
1453        0,
1454        0,
1455        MatSOR_MPISBAIJ,
1456        MatTranspose_MPISBAIJ,
1457 /*15*/ MatGetInfo_MPISBAIJ,
1458        MatEqual_MPISBAIJ,
1459        MatGetDiagonal_MPISBAIJ,
1460        MatDiagonalScale_MPISBAIJ,
1461        MatNorm_MPISBAIJ,
1462 /*20*/ MatAssemblyBegin_MPISBAIJ,
1463        MatAssemblyEnd_MPISBAIJ,
1464        MatSetOption_MPISBAIJ,
1465        MatZeroEntries_MPISBAIJ,
1466 /*24*/ 0,
1467        0,
1468        0,
1469        0,
1470        0,
1471 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1472        0,
1473        0,
1474        0,
1475        0,
1476 /*34*/ MatDuplicate_MPISBAIJ,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*39*/ MatAXPY_MPISBAIJ,
1482        MatGetSubMatrices_MPISBAIJ,
1483        MatIncreaseOverlap_MPISBAIJ,
1484        MatGetValues_MPISBAIJ,
1485        MatCopy_MPISBAIJ,
1486 /*44*/ 0,
1487        MatScale_MPISBAIJ,
1488        0,
1489        0,
1490        0,
1491 /*49*/ MatSetBlockSize_MPISBAIJ,
1492        0,
1493        0,
1494        0,
1495        0,
1496 /*54*/ 0,
1497        0,
1498        MatSetUnfactored_MPISBAIJ,
1499        0,
1500        MatSetValuesBlocked_MPISBAIJ,
1501 /*59*/ 0,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*64*/ 0,
1507        0,
1508        0,
1509        0,
1510        0,
1511 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*74*/ 0,
1517        0,
1518        0,
1519        0,
1520        0,
1521 /*79*/ 0,
1522        0,
1523        0,
1524        0,
1525        MatLoad_MPISBAIJ,
1526 /*84*/ 0,
1527        0,
1528        0,
1529        0,
1530        0,
1531 /*89*/ 0,
1532        0,
1533        0,
1534        0,
1535        0,
1536 /*94*/ 0,
1537        0,
1538        0,
1539        0,
1540        0,
1541 /*99*/ 0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*104*/0,
1547        MatRealPart_MPISBAIJ,
1548        MatImaginaryPart_MPISBAIJ,
1549        MatGetRowUpperTriangular_MPISBAIJ,
1550        MatRestoreRowUpperTriangular_MPISBAIJ,
1551 /*109*/0,
1552        0,
1553        0,
1554        0,
1555        0,
1556 /*114*/0,
1557        0,
1558        0,
1559        0,
1560        0,
1561 /*119*/0,
1562        0,
1563        0,
1564        0
1565 };
1566 
1567 
1568 EXTERN_C_BEGIN
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1571 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1572 {
1573   PetscFunctionBegin;
1574   *a = ((Mat_MPISBAIJ *)A->data)->A;
1575   PetscFunctionReturn(0);
1576 }
1577 EXTERN_C_END
1578 
1579 EXTERN_C_BEGIN
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1582 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1583 {
1584   Mat_MPISBAIJ   *b;
1585   PetscErrorCode ierr;
1586   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1587 
1588   PetscFunctionBegin;
1589   if (bs < 0){
1590     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1591       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1592     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1593     bs   = PetscAbs(bs);
1594   }
1595   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1596   bs = newbs;
1597 
1598   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1599   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1600   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1601   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1602 
1603   B->rmap->bs = B->cmap->bs = bs;
1604   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1605   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1606 
1607   if (d_nnz) {
1608     for (i=0; i<B->rmap->n/bs; i++) {
1609       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1610     }
1611   }
1612   if (o_nnz) {
1613     for (i=0; i<B->rmap->n/bs; i++) {
1614       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1615     }
1616   }
1617 
1618   b   = (Mat_MPISBAIJ*)B->data;
1619   mbs = B->rmap->n/bs;
1620   Mbs = B->rmap->N/bs;
1621   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1622 
1623   B->rmap->bs  = bs;
1624   b->bs2 = bs*bs;
1625   b->mbs = mbs;
1626   b->nbs = mbs;
1627   b->Mbs = Mbs;
1628   b->Nbs = Mbs;
1629 
1630   for (i=0; i<=b->size; i++) {
1631     b->rangebs[i] = B->rmap->range[i]/bs;
1632   }
1633   b->rstartbs = B->rmap->rstart/bs;
1634   b->rendbs   = B->rmap->rend/bs;
1635 
1636   b->cstartbs = B->cmap->rstart/bs;
1637   b->cendbs   = B->cmap->rend/bs;
1638 
1639   if (!B->preallocated) {
1640     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1641     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1642     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1643     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1644     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1645     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1646     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1647     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1648     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1649   }
1650 
1651   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1652   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1653   B->preallocated = PETSC_TRUE;
1654   PetscFunctionReturn(0);
1655 }
1656 EXTERN_C_END
1657 
1658 EXTERN_C_BEGIN
1659 #undef __FUNCT__
1660 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1661 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1662 {
1663   PetscInt       m,rstart,cstart,cend;
1664   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1665   const PetscInt *JJ=0;
1666   PetscScalar    *values=0;
1667   PetscErrorCode ierr;
1668 
1669   PetscFunctionBegin;
1670 
1671   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1672   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1673   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1674   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1675   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1676   m      = B->rmap->n/bs;
1677   rstart = B->rmap->rstart/bs;
1678   cstart = B->cmap->rstart/bs;
1679   cend   = B->cmap->rend/bs;
1680 
1681   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1682   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1683   for (i=0; i<m; i++) {
1684     nz = ii[i+1] - ii[i];
1685     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1686     nz_max = PetscMax(nz_max,nz);
1687     JJ  = jj + ii[i];
1688     for (j=0; j<nz; j++) {
1689       if (*JJ >= cstart) break;
1690       JJ++;
1691     }
1692     d = 0;
1693     for (; j<nz; j++) {
1694       if (*JJ++ >= cend) break;
1695       d++;
1696     }
1697     d_nnz[i] = d;
1698     o_nnz[i] = nz - d;
1699   }
1700   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1701   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1702 
1703   values = (PetscScalar*)V;
1704   if (!values) {
1705     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1706     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1707   }
1708   for (i=0; i<m; i++) {
1709     PetscInt          row    = i + rstart;
1710     PetscInt          ncols  = ii[i+1] - ii[i];
1711     const PetscInt    *icols = jj + ii[i];
1712     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1713     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1714   }
1715 
1716   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1717   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1718   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1719   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1720   PetscFunctionReturn(0);
1721 }
1722 EXTERN_C_END
1723 
1724 EXTERN_C_BEGIN
1725 #if defined(PETSC_HAVE_MUMPS)
1726 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1727 #endif
1728 #if defined(PETSC_HAVE_SPOOLES)
1729 extern PetscErrorCode  MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1730 #endif
1731 #if defined(PETSC_HAVE_PASTIX)
1732 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1733 #endif
1734 EXTERN_C_END
1735 
1736 /*MC
1737    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1738    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1739    the matrix is stored.
1740 
1741   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1742   can call MatSetOption(Mat, MAT_HERMITIAN);
1743 
1744    Options Database Keys:
1745 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1746 
1747   Level: beginner
1748 
1749 .seealso: MatCreateMPISBAIJ
1750 M*/
1751 
1752 EXTERN_C_BEGIN
1753 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*);
1754 EXTERN_C_END
1755 
1756 EXTERN_C_BEGIN
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatCreate_MPISBAIJ"
1759 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1760 {
1761   Mat_MPISBAIJ   *b;
1762   PetscErrorCode ierr;
1763   PetscBool      flg;
1764 
1765   PetscFunctionBegin;
1766 
1767   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1768   B->data = (void*)b;
1769   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1770 
1771   B->ops->destroy    = MatDestroy_MPISBAIJ;
1772   B->ops->view       = MatView_MPISBAIJ;
1773   B->assembled       = PETSC_FALSE;
1774 
1775   B->insertmode = NOT_SET_VALUES;
1776   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1777   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1778 
1779   /* build local table of row and column ownerships */
1780   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1781 
1782   /* build cache for off array entries formed */
1783   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1784   b->donotstash  = PETSC_FALSE;
1785   b->colmap      = PETSC_NULL;
1786   b->garray      = PETSC_NULL;
1787   b->roworiented = PETSC_TRUE;
1788 
1789   /* stuff used in block assembly */
1790   b->barray       = 0;
1791 
1792   /* stuff used for matrix vector multiply */
1793   b->lvec         = 0;
1794   b->Mvctx        = 0;
1795   b->slvec0       = 0;
1796   b->slvec0b      = 0;
1797   b->slvec1       = 0;
1798   b->slvec1a      = 0;
1799   b->slvec1b      = 0;
1800   b->sMvctx       = 0;
1801 
1802   /* stuff for MatGetRow() */
1803   b->rowindices   = 0;
1804   b->rowvalues    = 0;
1805   b->getrowactive = PETSC_FALSE;
1806 
1807   /* hash table stuff */
1808   b->ht           = 0;
1809   b->hd           = 0;
1810   b->ht_size      = 0;
1811   b->ht_flag      = PETSC_FALSE;
1812   b->ht_fact      = 0;
1813   b->ht_total_ct  = 0;
1814   b->ht_insert_ct = 0;
1815 
1816   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1817   b->ijonly       = PETSC_FALSE;
1818 
1819   b->in_loc       = 0;
1820   b->v_loc        = 0;
1821   b->n_loc        = 0;
1822   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1823     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1824     if (flg) {
1825       PetscReal fact = 1.39;
1826       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1827       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1828       if (fact <= 1.0) fact = 1.39;
1829       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1830       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1831     }
1832   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1833 
1834 #if defined(PETSC_HAVE_PASTIX)
1835   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1836 					   "MatGetFactor_mpisbaij_pastix",
1837 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1838 #endif
1839 #if defined(PETSC_HAVE_MUMPS)
1840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1841                                      "MatGetFactor_sbaij_mumps",
1842                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1843 #endif
1844 #if defined(PETSC_HAVE_SPOOLES)
1845   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1846                                      "MatGetFactor_mpisbaij_spooles",
1847                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1848 #endif
1849   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1850                                      "MatStoreValues_MPISBAIJ",
1851                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1852   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1853                                      "MatRetrieveValues_MPISBAIJ",
1854                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1855   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1856                                      "MatGetDiagonalBlock_MPISBAIJ",
1857                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1858   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1859                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1860                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1861   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1862                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1863                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1864   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1865                                      "MatConvert_MPISBAIJ_MPISBSTRM",
1866                                       MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1867 
1868   B->symmetric                  = PETSC_TRUE;
1869   B->structurally_symmetric     = PETSC_TRUE;
1870   B->symmetric_set              = PETSC_TRUE;
1871   B->structurally_symmetric_set = PETSC_TRUE;
1872   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 EXTERN_C_END
1876 
1877 /*MC
1878    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1879 
1880    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1881    and MATMPISBAIJ otherwise.
1882 
1883    Options Database Keys:
1884 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1885 
1886   Level: beginner
1887 
1888 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1889 M*/
1890 
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1893 /*@C
1894    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1895    the user should preallocate the matrix storage by setting the parameters
1896    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1897    performance can be increased by more than a factor of 50.
1898 
1899    Collective on Mat
1900 
1901    Input Parameters:
1902 +  A - the matrix
1903 .  bs   - size of blockk
1904 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1905            submatrix  (same for all local rows)
1906 .  d_nnz - array containing the number of block nonzeros in the various block rows
1907            in the upper triangular and diagonal part of the in diagonal portion of the local
1908            (possibly different for each block row) or PETSC_NULL.  If you plan to factor the matrix you must leave room
1909            for the diagonal entry and set a value even if it is zero.
1910 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1911            submatrix (same for all local rows).
1912 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1913            off-diagonal portion of the local submatrix that is right of the diagonal
1914            (possibly different for each block row) or PETSC_NULL.
1915 
1916 
1917    Options Database Keys:
1918 .   -mat_no_unroll - uses code that does not unroll the loops in the
1919                      block calculations (much slower)
1920 .   -mat_block_size - size of the blocks to use
1921 
1922    Notes:
1923 
1924    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1925    than it must be used on all processors that share the object for that argument.
1926 
1927    If the *_nnz parameter is given then the *_nz parameter is ignored
1928 
1929    Storage Information:
1930    For a square global matrix we define each processor's diagonal portion
1931    to be its local rows and the corresponding columns (a square submatrix);
1932    each processor's off-diagonal portion encompasses the remainder of the
1933    local matrix (a rectangular submatrix).
1934 
1935    The user can specify preallocated storage for the diagonal part of
1936    the local submatrix with either d_nz or d_nnz (not both).  Set
1937    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1938    memory allocation.  Likewise, specify preallocated storage for the
1939    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1940 
1941    You can call MatGetInfo() to get information on how effective the preallocation was;
1942    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1943    You can also run with the option -info and look for messages with the string
1944    malloc in them to see if additional memory allocation was needed.
1945 
1946    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1947    the figure below we depict these three local rows and all columns (0-11).
1948 
1949 .vb
1950            0 1 2 3 4 5 6 7 8 9 10 11
1951           -------------------
1952    row 3  |  . . . d d d o o o o o o
1953    row 4  |  . . . d d d o o o o o o
1954    row 5  |  . . . d d d o o o o o o
1955           -------------------
1956 .ve
1957 
1958    Thus, any entries in the d locations are stored in the d (diagonal)
1959    submatrix, and any entries in the o locations are stored in the
1960    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1961    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1962 
1963    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1964    plus the diagonal part of the d matrix,
1965    and o_nz should indicate the number of block nonzeros per row in the o matrix
1966 
1967    In general, for PDE problems in which most nonzeros are near the diagonal,
1968    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1969    or you will get TERRIBLE performance; see the users' manual chapter on
1970    matrices.
1971 
1972    Level: intermediate
1973 
1974 .keywords: matrix, block, aij, compressed row, sparse, parallel
1975 
1976 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1977 @*/
1978 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1979 {
1980   PetscErrorCode ierr;
1981 
1982   PetscFunctionBegin;
1983   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1984   PetscValidType(B,1);
1985   PetscValidLogicalCollectiveInt(B,bs,2);
1986   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatCreateMPISBAIJ"
1992 /*@C
1993    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1994    (block compressed row).  For good matrix assembly performance
1995    the user should preallocate the matrix storage by setting the parameters
1996    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1997    performance can be increased by more than a factor of 50.
1998 
1999    Collective on MPI_Comm
2000 
2001    Input Parameters:
2002 +  comm - MPI communicator
2003 .  bs   - size of blockk
2004 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2005            This value should be the same as the local size used in creating the
2006            y vector for the matrix-vector product y = Ax.
2007 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2008            This value should be the same as the local size used in creating the
2009            x vector for the matrix-vector product y = Ax.
2010 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2011 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2012 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2013            submatrix  (same for all local rows)
2014 .  d_nnz - array containing the number of block nonzeros in the various block rows
2015            in the upper triangular portion of the in diagonal portion of the local
2016            (possibly different for each block block row) or PETSC_NULL.
2017            If you plan to factor the matrix you must leave room for the diagonal entry and
2018            set its value even if it is zero.
2019 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2020            submatrix (same for all local rows).
2021 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2022            off-diagonal portion of the local submatrix (possibly different for
2023            each block row) or PETSC_NULL.
2024 
2025    Output Parameter:
2026 .  A - the matrix
2027 
2028    Options Database Keys:
2029 .   -mat_no_unroll - uses code that does not unroll the loops in the
2030                      block calculations (much slower)
2031 .   -mat_block_size - size of the blocks to use
2032 .   -mat_mpi - use the parallel matrix data structures even on one processor
2033                (defaults to using SeqBAIJ format on one processor)
2034 
2035    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2036    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2037    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2038 
2039    Notes:
2040    The number of rows and columns must be divisible by blocksize.
2041    This matrix type does not support complex Hermitian operation.
2042 
2043    The user MUST specify either the local or global matrix dimensions
2044    (possibly both).
2045 
2046    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2047    than it must be used on all processors that share the object for that argument.
2048 
2049    If the *_nnz parameter is given then the *_nz parameter is ignored
2050 
2051    Storage Information:
2052    For a square global matrix we define each processor's diagonal portion
2053    to be its local rows and the corresponding columns (a square submatrix);
2054    each processor's off-diagonal portion encompasses the remainder of the
2055    local matrix (a rectangular submatrix).
2056 
2057    The user can specify preallocated storage for the diagonal part of
2058    the local submatrix with either d_nz or d_nnz (not both).  Set
2059    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2060    memory allocation.  Likewise, specify preallocated storage for the
2061    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2062 
2063    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2064    the figure below we depict these three local rows and all columns (0-11).
2065 
2066 .vb
2067            0 1 2 3 4 5 6 7 8 9 10 11
2068           -------------------
2069    row 3  |  . . . d d d o o o o o o
2070    row 4  |  . . . d d d o o o o o o
2071    row 5  |  . . . d d d o o o o o o
2072           -------------------
2073 .ve
2074 
2075    Thus, any entries in the d locations are stored in the d (diagonal)
2076    submatrix, and any entries in the o locations are stored in the
2077    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2078    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2079 
2080    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2081    plus the diagonal part of the d matrix,
2082    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2083    In general, for PDE problems in which most nonzeros are near the diagonal,
2084    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2085    or you will get TERRIBLE performance; see the users' manual chapter on
2086    matrices.
2087 
2088    Level: intermediate
2089 
2090 .keywords: matrix, block, aij, compressed row, sparse, parallel
2091 
2092 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2093 @*/
2094 
2095 PetscErrorCode  MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2096 {
2097   PetscErrorCode ierr;
2098   PetscMPIInt    size;
2099 
2100   PetscFunctionBegin;
2101   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2102   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2103   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2104   if (size > 1) {
2105     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2106     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2107   } else {
2108     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2109     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2110   }
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 
2115 #undef __FUNCT__
2116 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2117 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2118 {
2119   Mat            mat;
2120   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2121   PetscErrorCode ierr;
2122   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2123   PetscScalar    *array;
2124 
2125   PetscFunctionBegin;
2126   *newmat       = 0;
2127   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2128   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2129   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2130   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2131   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2132   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2133 
2134   mat->factortype   = matin->factortype;
2135   mat->preallocated = PETSC_TRUE;
2136   mat->assembled    = PETSC_TRUE;
2137   mat->insertmode   = NOT_SET_VALUES;
2138 
2139   a = (Mat_MPISBAIJ*)mat->data;
2140   a->bs2   = oldmat->bs2;
2141   a->mbs   = oldmat->mbs;
2142   a->nbs   = oldmat->nbs;
2143   a->Mbs   = oldmat->Mbs;
2144   a->Nbs   = oldmat->Nbs;
2145 
2146 
2147   a->size         = oldmat->size;
2148   a->rank         = oldmat->rank;
2149   a->donotstash   = oldmat->donotstash;
2150   a->roworiented  = oldmat->roworiented;
2151   a->rowindices   = 0;
2152   a->rowvalues    = 0;
2153   a->getrowactive = PETSC_FALSE;
2154   a->barray       = 0;
2155   a->rstartbs    = oldmat->rstartbs;
2156   a->rendbs      = oldmat->rendbs;
2157   a->cstartbs    = oldmat->cstartbs;
2158   a->cendbs      = oldmat->cendbs;
2159 
2160   /* hash table stuff */
2161   a->ht           = 0;
2162   a->hd           = 0;
2163   a->ht_size      = 0;
2164   a->ht_flag      = oldmat->ht_flag;
2165   a->ht_fact      = oldmat->ht_fact;
2166   a->ht_total_ct  = 0;
2167   a->ht_insert_ct = 0;
2168 
2169   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2170   if (oldmat->colmap) {
2171 #if defined (PETSC_USE_CTABLE)
2172     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2173 #else
2174     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2175     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2176     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2177 #endif
2178   } else a->colmap = 0;
2179 
2180   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2181     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2182     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2183     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2184   } else a->garray = 0;
2185 
2186   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2187   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2188   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2189   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2190   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2191 
2192   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2193   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2194   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2195   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2196 
2197   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2198   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2199   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2200   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2201   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2202   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2203   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2204   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2205   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2206   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2207   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2208   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2209   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2210 
2211   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2212   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2213   a->sMvctx = oldmat->sMvctx;
2214   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2215 
2216   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2217   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2218   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2219   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2220   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2221   *newmat = mat;
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 #undef __FUNCT__
2226 #define __FUNCT__ "MatLoad_MPISBAIJ"
2227 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2228 {
2229   PetscErrorCode ierr;
2230   PetscInt       i,nz,j,rstart,rend;
2231   PetscScalar    *vals,*buf;
2232   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2233   MPI_Status     status;
2234   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2235   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2236   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2237   PetscInt       bs=1,Mbs,mbs,extra_rows;
2238   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2239   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2240   int            fd;
2241 
2242   PetscFunctionBegin;
2243   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2244     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2245   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2246 
2247   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2248   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2249   if (!rank) {
2250     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2251     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2252     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2253     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2254   }
2255 
2256   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2257 
2258   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2259   M = header[1]; N = header[2];
2260 
2261   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2262   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2263   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2264 
2265   /* If global sizes are set, check if they are consistent with that given in the file */
2266   if (sizesset) {
2267     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2268   }
2269   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2270   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2271 
2272   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2273 
2274   /*
2275      This code adds extra rows to make sure the number of rows is
2276      divisible by the blocksize
2277   */
2278   Mbs        = M/bs;
2279   extra_rows = bs - M + bs*(Mbs);
2280   if (extra_rows == bs) extra_rows = 0;
2281   else                  Mbs++;
2282   if (extra_rows &&!rank) {
2283     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2284   }
2285 
2286   /* determine ownership of all rows */
2287   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2288     mbs        = Mbs/size + ((Mbs % size) > rank);
2289     m          = mbs*bs;
2290   } else { /* User Set */
2291     m          = newmat->rmap->n;
2292     mbs        = m/bs;
2293   }
2294   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2295   mmbs       = PetscMPIIntCast(mbs);
2296   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2297   rowners[0] = 0;
2298   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2299   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2300   rstart = rowners[rank];
2301   rend   = rowners[rank+1];
2302 
2303   /* distribute row lengths to all processors */
2304   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2305   if (!rank) {
2306     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2307     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2308     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2309     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2310     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2311     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2312     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2313   } else {
2314     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2315   }
2316 
2317   if (!rank) {   /* procs[0] */
2318     /* calculate the number of nonzeros on each processor */
2319     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2320     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2321     for (i=0; i<size; i++) {
2322       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2323         procsnz[i] += rowlengths[j];
2324       }
2325     }
2326     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2327 
2328     /* determine max buffer needed and allocate it */
2329     maxnz = 0;
2330     for (i=0; i<size; i++) {
2331       maxnz = PetscMax(maxnz,procsnz[i]);
2332     }
2333     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2334 
2335     /* read in my part of the matrix column indices  */
2336     nz     = procsnz[0];
2337     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2338     mycols = ibuf;
2339     if (size == 1)  nz -= extra_rows;
2340     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2341     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2342 
2343     /* read in every ones (except the last) and ship off */
2344     for (i=1; i<size-1; i++) {
2345       nz   = procsnz[i];
2346       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2347       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2348     }
2349     /* read in the stuff for the last proc */
2350     if (size != 1) {
2351       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2352       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2353       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2354       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2355     }
2356     ierr = PetscFree(cols);CHKERRQ(ierr);
2357   } else {  /* procs[i], i>0 */
2358     /* determine buffer space needed for message */
2359     nz = 0;
2360     for (i=0; i<m; i++) {
2361       nz += locrowlens[i];
2362     }
2363     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2364     mycols = ibuf;
2365     /* receive message of column indices*/
2366     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2367     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2368     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2369   }
2370 
2371   /* loop over local rows, determining number of off diagonal entries */
2372   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2373   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2374   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2375   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2376   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2377   rowcount = 0;
2378   nzcount  = 0;
2379   for (i=0; i<mbs; i++) {
2380     dcount  = 0;
2381     odcount = 0;
2382     for (j=0; j<bs; j++) {
2383       kmax = locrowlens[rowcount];
2384       for (k=0; k<kmax; k++) {
2385         tmp = mycols[nzcount++]/bs; /* block col. index */
2386         if (!mask[tmp]) {
2387           mask[tmp] = 1;
2388           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2389           else masked1[dcount++] = tmp; /* entry in diag portion */
2390         }
2391       }
2392       rowcount++;
2393     }
2394 
2395     dlens[i]  = dcount;  /* d_nzz[i] */
2396     odlens[i] = odcount; /* o_nzz[i] */
2397 
2398     /* zero out the mask elements we set */
2399     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2400     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2401   }
2402     if (!sizesset) {
2403     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2404   }
2405   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2406   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2407 
2408   if (!rank) {
2409     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2410     /* read in my part of the matrix numerical values  */
2411     nz = procsnz[0];
2412     vals = buf;
2413     mycols = ibuf;
2414     if (size == 1)  nz -= extra_rows;
2415     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2416     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2417 
2418     /* insert into matrix */
2419     jj      = rstart*bs;
2420     for (i=0; i<m; i++) {
2421       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2422       mycols += locrowlens[i];
2423       vals   += locrowlens[i];
2424       jj++;
2425     }
2426 
2427     /* read in other processors (except the last one) and ship out */
2428     for (i=1; i<size-1; i++) {
2429       nz   = procsnz[i];
2430       vals = buf;
2431       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2432       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2433     }
2434     /* the last proc */
2435     if (size != 1){
2436       nz   = procsnz[i] - extra_rows;
2437       vals = buf;
2438       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2439       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2440       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2441     }
2442     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2443 
2444   } else {
2445     /* receive numeric values */
2446     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2447 
2448     /* receive message of values*/
2449     vals   = buf;
2450     mycols = ibuf;
2451     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2452     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2453     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2454 
2455     /* insert into matrix */
2456     jj      = rstart*bs;
2457     for (i=0; i<m; i++) {
2458       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2459       mycols += locrowlens[i];
2460       vals   += locrowlens[i];
2461       jj++;
2462     }
2463   }
2464 
2465   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2466   ierr = PetscFree(buf);CHKERRQ(ierr);
2467   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2468   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2469   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2470   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2471   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2472   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 #undef __FUNCT__
2477 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2478 /*XXXXX@
2479    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2480 
2481    Input Parameters:
2482 .  mat  - the matrix
2483 .  fact - factor
2484 
2485    Not Collective on Mat, each process can have a different hash factor
2486 
2487    Level: advanced
2488 
2489   Notes:
2490    This can also be set by the command line option: -mat_use_hash_table fact
2491 
2492 .keywords: matrix, hashtable, factor, HT
2493 
2494 .seealso: MatSetOption()
2495 @XXXXX*/
2496 
2497 
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2500 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2501 {
2502   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2503   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2504   PetscReal      atmp;
2505   PetscReal      *work,*svalues,*rvalues;
2506   PetscErrorCode ierr;
2507   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2508   PetscMPIInt    rank,size;
2509   PetscInt       *rowners_bs,dest,count,source;
2510   PetscScalar    *va;
2511   MatScalar      *ba;
2512   MPI_Status     stat;
2513 
2514   PetscFunctionBegin;
2515   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2516   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2517   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2518 
2519   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2520   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2521 
2522   bs   = A->rmap->bs;
2523   mbs  = a->mbs;
2524   Mbs  = a->Mbs;
2525   ba   = b->a;
2526   bi   = b->i;
2527   bj   = b->j;
2528 
2529   /* find ownerships */
2530   rowners_bs = A->rmap->range;
2531 
2532   /* each proc creates an array to be distributed */
2533   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2534   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2535 
2536   /* row_max for B */
2537   if (rank != size-1){
2538     for (i=0; i<mbs; i++) {
2539       ncols = bi[1] - bi[0]; bi++;
2540       brow  = bs*i;
2541       for (j=0; j<ncols; j++){
2542         bcol = bs*(*bj);
2543         for (kcol=0; kcol<bs; kcol++){
2544           col = bcol + kcol;                 /* local col index */
2545           col += rowners_bs[rank+1];      /* global col index */
2546           for (krow=0; krow<bs; krow++){
2547             atmp = PetscAbsScalar(*ba); ba++;
2548             row = brow + krow;    /* local row index */
2549             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2550             if (work[col] < atmp) work[col] = atmp;
2551           }
2552         }
2553         bj++;
2554       }
2555     }
2556 
2557     /* send values to its owners */
2558     for (dest=rank+1; dest<size; dest++){
2559       svalues = work + rowners_bs[dest];
2560       count   = rowners_bs[dest+1]-rowners_bs[dest];
2561       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2562     }
2563   }
2564 
2565   /* receive values */
2566   if (rank){
2567     rvalues = work;
2568     count   = rowners_bs[rank+1]-rowners_bs[rank];
2569     for (source=0; source<rank; source++){
2570       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2571       /* process values */
2572       for (i=0; i<count; i++){
2573         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2574       }
2575     }
2576   }
2577 
2578   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2579   ierr = PetscFree(work);CHKERRQ(ierr);
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 #undef __FUNCT__
2584 #define __FUNCT__ "MatSOR_MPISBAIJ"
2585 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2586 {
2587   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2588   PetscErrorCode    ierr;
2589   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2590   PetscScalar       *x,*ptr,*from;
2591   Vec               bb1;
2592   const PetscScalar *b;
2593 
2594   PetscFunctionBegin;
2595   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2596   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2597 
2598   if (flag == SOR_APPLY_UPPER) {
2599     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2600     PetscFunctionReturn(0);
2601   }
2602 
2603   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2604     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2605       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2606       its--;
2607     }
2608 
2609     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2610     while (its--){
2611 
2612       /* lower triangular part: slvec0b = - B^T*xx */
2613       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2614 
2615       /* copy xx into slvec0a */
2616       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2617       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2618       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2619       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2620 
2621       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2622 
2623       /* copy bb into slvec1a */
2624       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2625       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2626       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2627       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2628 
2629       /* set slvec1b = 0 */
2630       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2631 
2632       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2633       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2634       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2635       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2636 
2637       /* upper triangular part: bb1 = bb1 - B*x */
2638       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2639 
2640       /* local diagonal sweep */
2641       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2642     }
2643     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2644   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2645     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2646   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2647     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2648   } else if (flag & SOR_EISENSTAT) {
2649     Vec               xx1;
2650     PetscBool         hasop;
2651     const PetscScalar *diag;
2652     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2653     PetscInt          i,n;
2654 
2655     if (!mat->xx1) {
2656       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2657       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2658     }
2659     xx1 = mat->xx1;
2660     bb1 = mat->bb1;
2661 
2662     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2663 
2664     if (!mat->diag) {
2665       /* this is wrong for same matrix with new nonzero values */
2666       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2667       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2668     }
2669     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2670 
2671     if (hasop) {
2672       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2673       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2674     } else {
2675       /*
2676           These two lines are replaced by code that may be a bit faster for a good compiler
2677       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2678       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2679       */
2680       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2681       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2682       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2683       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2684       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2685       if (omega == 1.0) {
2686 	for (i=0; i<n; i++) {
2687 	  sl[i] = b[i] - diag[i]*x[i];
2688 	}
2689         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2690       } else {
2691 	for (i=0; i<n; i++) {
2692 	  sl[i] = b[i] + scale*diag[i]*x[i];
2693 	}
2694         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2695       }
2696       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2697       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2698       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2699       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2700     }
2701 
2702     /* multiply off-diagonal portion of matrix */
2703     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2704     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2705     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2706     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2707     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2708     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2709     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2710     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2711     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2712     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2713 
2714     /* local sweep */
2715     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2716     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2717   } else {
2718     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2719   }
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2725 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2726 {
2727   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2728   PetscErrorCode ierr;
2729   Vec            lvec1,bb1;
2730 
2731   PetscFunctionBegin;
2732   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2733   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2734 
2735   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2736     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2737       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2738       its--;
2739     }
2740 
2741     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2742     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2743     while (its--){
2744       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2745 
2746       /* lower diagonal part: bb1 = bb - B^T*xx */
2747       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2748       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2749 
2750       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2751       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2752       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2753 
2754       /* upper diagonal part: bb1 = bb1 - B*x */
2755       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2756       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2757 
2758       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2759 
2760       /* diagonal sweep */
2761       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2762     }
2763     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2764     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2765   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 #undef __FUNCT__
2770 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2771 /*@
2772      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2773          CSR format the local rows.
2774 
2775    Collective on MPI_Comm
2776 
2777    Input Parameters:
2778 +  comm - MPI communicator
2779 .  bs - the block size, only a block size of 1 is supported
2780 .  m - number of local rows (Cannot be PETSC_DECIDE)
2781 .  n - This value should be the same as the local size used in creating the
2782        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2783        calculated if N is given) For square matrices n is almost always m.
2784 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2785 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2786 .   i - row indices
2787 .   j - column indices
2788 -   a - matrix values
2789 
2790    Output Parameter:
2791 .   mat - the matrix
2792 
2793    Level: intermediate
2794 
2795    Notes:
2796        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2797      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2798      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2799 
2800        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2801 
2802 .keywords: matrix, aij, compressed row, sparse, parallel
2803 
2804 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2805           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
2806 @*/
2807 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2808 {
2809   PetscErrorCode ierr;
2810 
2811 
2812  PetscFunctionBegin;
2813   if (i[0]) {
2814     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2815   }
2816   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2817   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2818   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2819   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2820   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2821   PetscFunctionReturn(0);
2822 }
2823 
2824 
2825 #undef __FUNCT__
2826 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2827 /*@C
2828    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2829    (the default parallel PETSc format).
2830 
2831    Collective on MPI_Comm
2832 
2833    Input Parameters:
2834 +  A - the matrix
2835 .  bs - the block size
2836 .  i - the indices into j for the start of each local row (starts with zero)
2837 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2838 -  v - optional values in the matrix
2839 
2840    Level: developer
2841 
2842 .keywords: matrix, aij, compressed row, sparse, parallel
2843 
2844 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2845 @*/
2846 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2847 {
2848   PetscErrorCode ierr;
2849 
2850   PetscFunctionBegin;
2851   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2852   PetscFunctionReturn(0);
2853 }
2854 
2855 
2856