xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 6214f4126138aa4cc9867da86c269cb48c3e22ce)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 #if defined(PETSC_HAVE_ELEMENTAL)
23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
24 #endif
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
28 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
29 {
30   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
31   PetscErrorCode ierr;
32 
33   PetscFunctionBegin;
34   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
35   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
53   { \
54  \
55     brow = row/bs;  \
56     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
57     rmax = aimax[brow]; nrow = ailen[brow]; \
58     bcol = col/bs; \
59     ridx = row % bs; cidx = col % bs; \
60     low  = 0; high = nrow; \
61     while (high-low > 3) { \
62       t = (low+high)/2; \
63       if (rp[t] > bcol) high = t; \
64       else              low  = t; \
65     } \
66     for (_i=low; _i<high; _i++) { \
67       if (rp[_i] > bcol) break; \
68       if (rp[_i] == bcol) { \
69         bap = ap + bs2*_i + bs*cidx + ridx; \
70         if (addv == ADD_VALUES) *bap += value;  \
71         else                    *bap  = value;  \
72         goto a_noinsert; \
73       } \
74     } \
75     if (a->nonew == 1) goto a_noinsert; \
76     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
77     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
78     N = nrow++ - 1;  \
79     /* shift up all the later entries in this row */ \
80     for (ii=N; ii>=_i; ii--) { \
81       rp[ii+1] = rp[ii]; \
82       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
83     } \
84     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
85     rp[_i]                      = bcol;  \
86     ap[bs2*_i + bs*cidx + ridx] = value;  \
87     A->nonzerostate++;\
88 a_noinsert:; \
89     ailen[brow] = nrow; \
90   }
91 
92 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
93   { \
94     brow = row/bs;  \
95     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
96     rmax = bimax[brow]; nrow = bilen[brow]; \
97     bcol = col/bs; \
98     ridx = row % bs; cidx = col % bs; \
99     low  = 0; high = nrow; \
100     while (high-low > 3) { \
101       t = (low+high)/2; \
102       if (rp[t] > bcol) high = t; \
103       else              low  = t; \
104     } \
105     for (_i=low; _i<high; _i++) { \
106       if (rp[_i] > bcol) break; \
107       if (rp[_i] == bcol) { \
108         bap = ap + bs2*_i + bs*cidx + ridx; \
109         if (addv == ADD_VALUES) *bap += value;  \
110         else                    *bap  = value;  \
111         goto b_noinsert; \
112       } \
113     } \
114     if (b->nonew == 1) goto b_noinsert; \
115     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
116     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
117     N = nrow++ - 1;  \
118     /* shift up all the later entries in this row */ \
119     for (ii=N; ii>=_i; ii--) { \
120       rp[ii+1] = rp[ii]; \
121       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122     } \
123     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
124     rp[_i]                      = bcol;  \
125     ap[bs2*_i + bs*cidx + ridx] = value;  \
126     B->nonzerostate++;\
127 b_noinsert:; \
128     bilen[brow] = nrow; \
129   }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat          A     = baij->A;
149   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar    *aa   =a->a;
152 
153   Mat         B     = baij->B;
154   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar   *ba   =b->a;
157 
158   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar *ap,*bap;
161 
162   /* for stash */
163   PetscInt  n_loc, *in_loc = NULL;
164   MatScalar *v_loc = NULL;
165 
166   PetscFunctionBegin;
167   if (!baij->donotstash) {
168     if (n > baij->n_loc) {
169       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
170       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
171       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
173 
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs) {
189           if (a->ignore_ltriangular) {
190             continue;    /* ignore lower triangular blocks */
191           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
192         }
193         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
194           col  = in[j] - cstart_orig;         /* local col index */
195           brow = row/bs; bcol = col/bs;
196           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
197           if (roworiented) value = v[i*n+j];
198           else             value = v[i+j*m];
199           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
200           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
201         } else if (in[j] < 0) continue;
202 #if defined(PETSC_USE_DEBUG)
203         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
204 #endif
205         else {  /* off-diag entry (B) */
206           if (mat->was_assembled) {
207             if (!baij->colmap) {
208               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
209             }
210 #if defined(PETSC_USE_CTABLE)
211             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
212             col  = col - 1;
213 #else
214             col = baij->colmap[in[j]/bs] - 1;
215 #endif
216             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
217               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
218               col  =  in[j];
219               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
220               B    = baij->B;
221               b    = (Mat_SeqBAIJ*)(B)->data;
222               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
223               ba   = b->a;
224             } else col += in[j]%bs;
225           } else col = in[j];
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         mat->assembled = PETSC_FALSE;
236         n_loc          = 0;
237         for (j=0; j<n; j++) {
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray     =baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if (!barray) {
269     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if ((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++ = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend) {
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         } else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined(PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined(PETSC_USE_CTABLE)
333             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col  = in[j];
341             }
342           } else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
348       if (!baij->donotstash) {
349         if (roworiented) {
350           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         } else {
352           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         }
354       }
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatGetValues_MPISBAIJ"
362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
363 {
364   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
365   PetscErrorCode ierr;
366   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
367   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
368 
369   PetscFunctionBegin;
370   for (i=0; i<m; i++) {
371     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
372     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
373     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
374       row = idxm[i] - bsrstart;
375       for (j=0; j<n; j++) {
376         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
377         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
378         if (idxn[j] >= bscstart && idxn[j] < bscend) {
379           col  = idxn[j] - bscstart;
380           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
381         } else {
382           if (!baij->colmap) {
383             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
384           }
385 #if defined(PETSC_USE_CTABLE)
386           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
387           data--;
388 #else
389           data = baij->colmap[idxn[j]/bs]-1;
390 #endif
391           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
392           else {
393             col  = data + idxn[j]%bs;
394             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
395           }
396         }
397       }
398     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
399   }
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNCT__
404 #define __FUNCT__ "MatNorm_MPISBAIJ"
405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
406 {
407   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscReal      sum[2],*lnorm2;
410 
411   PetscFunctionBegin;
412   if (baij->size == 1) {
413     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
414   } else {
415     if (type == NORM_FROBENIUS) {
416       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
417       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
418       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
419       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
420       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
421       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
422       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
423       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
424     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
425       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
426       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
427       PetscReal    *rsum,*rsum2,vabs;
428       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
429       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
430       MatScalar    *v;
431 
432       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
433       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz   = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++) {
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++) {
442             for (row=0; row<bs; row++) {
443               vabs            = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++) {
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++) {
459             for (row=0; row<bs; row++) {
460               vabs            = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap->N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
473     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
474   }
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
483   PetscErrorCode ierr;
484   PetscInt       nstash,reallocs;
485   InsertMode     addv;
486 
487   PetscFunctionBegin;
488   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
489 
490   /* make sure all processors are either in INSERTMODE or ADDMODE */
491   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
492   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
493   mat->insertmode = addv; /* in case this processor had no cache */
494 
495   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
496   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
497   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
498   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
499   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
500   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
507 {
508   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
509   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
510   PetscErrorCode ierr;
511   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
512   PetscInt       *row,*col;
513   PetscBool      other_disassembled;
514   PetscMPIInt    n;
515   PetscBool      r1,r2,r3;
516   MatScalar      *val;
517   InsertMode     addv = mat->insertmode;
518 
519   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
520   PetscFunctionBegin;
521   if (!baij->donotstash &&  !mat->nooffprocentries) {
522     while (1) {
523       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
524       if (!flg) break;
525 
526       for (i=0; i<n;) {
527         /* Now identify the consecutive vals belonging to the same row */
528         for (j=i,rstart=row[j]; j<n; j++) {
529           if (row[j] != rstart) break;
530         }
531         if (j < n) ncols = j-i;
532         else       ncols = n-i;
533         /* Now assemble all these values with a single function call */
534         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
535         i    = j;
536       }
537     }
538     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
539     /* Now process the block-stash. Since the values are stashed column-oriented,
540        set the roworiented flag to column oriented, and after MatSetValues()
541        restore the original flags */
542     r1 = baij->roworiented;
543     r2 = a->roworiented;
544     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
545 
546     baij->roworiented = PETSC_FALSE;
547     a->roworiented    = PETSC_FALSE;
548 
549     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
550     while (1) {
551       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
552       if (!flg) break;
553 
554       for (i=0; i<n;) {
555         /* Now identify the consecutive vals belonging to the same row */
556         for (j=i,rstart=row[j]; j<n; j++) {
557           if (row[j] != rstart) break;
558         }
559         if (j < n) ncols = j-i;
560         else       ncols = n-i;
561         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
562         i    = j;
563       }
564     }
565     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
566 
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569 
570     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
571   }
572 
573   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
591   }
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
596 
597   baij->rowvalues = 0;
598 
599   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
600   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
601     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
602     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
609 #include <petscdraw.h>
610 #undef __FUNCT__
611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
613 {
614   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
615   PetscErrorCode    ierr;
616   PetscInt          bs   = mat->rmap->bs;
617   PetscMPIInt       rank = baij->rank;
618   PetscBool         iascii,isdraw;
619   PetscViewer       sviewer;
620   PetscViewerFormat format;
621 
622   PetscFunctionBegin;
623   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
624   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
625   if (iascii) {
626     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
627     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
628       MatInfo info;
629       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
630       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
633       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
634       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
635       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
636       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
637       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
638       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
639       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
640       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
641       PetscFunctionReturn(0);
642     } else if (format == PETSC_VIEWER_ASCII_INFO) {
643       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
644       PetscFunctionReturn(0);
645     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
646       PetscFunctionReturn(0);
647     }
648   }
649 
650   if (isdraw) {
651     PetscDraw draw;
652     PetscBool isnull;
653     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
654     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
655   }
656 
657   {
658     /* assemble the entire matrix onto first processor. */
659     Mat          A;
660     Mat_SeqSBAIJ *Aloc;
661     Mat_SeqBAIJ  *Bloc;
662     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
663     MatScalar    *a;
664 
665     /* Should this be the same type as mat? */
666     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
667     if (!rank) {
668       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
669     } else {
670       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
671     }
672     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
673     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
674     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
675     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
676 
677     /* copy over the A part */
678     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
679     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
680     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
681 
682     for (i=0; i<mbs; i++) {
683       rvals[0] = bs*(baij->rstartbs + i);
684       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
685       for (j=ai[i]; j<ai[i+1]; j++) {
686         col = (baij->cstartbs+aj[j])*bs;
687         for (k=0; k<bs; k++) {
688           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
689           col++;
690           a += bs;
691         }
692       }
693     }
694     /* copy over the B part */
695     Bloc = (Mat_SeqBAIJ*)baij->B->data;
696     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
697     for (i=0; i<mbs; i++) {
698 
699       rvals[0] = bs*(baij->rstartbs + i);
700       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
701       for (j=ai[i]; j<ai[i+1]; j++) {
702         col = baij->garray[aj[j]]*bs;
703         for (k=0; k<bs; k++) {
704           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
705           col++;
706           a += bs;
707         }
708       }
709     }
710     ierr = PetscFree(rvals);CHKERRQ(ierr);
711     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
712     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
713     /*
714        Everyone has to call to draw the matrix since the graphics waits are
715        synchronized across all processors that share the PetscDraw object
716     */
717     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
718     if (!rank) {
719         ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
720     }
721     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
722     ierr = MatDestroy(&A);CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatView_MPISBAIJ"
729 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
730 {
731   PetscErrorCode ierr;
732   PetscBool      iascii,isdraw,issocket,isbinary;
733 
734   PetscFunctionBegin;
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
739   if (iascii || isdraw || issocket || isbinary) {
740     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatDestroy_MPISBAIJ"
747 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
748 {
749   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753 #if defined(PETSC_USE_LOG)
754   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
755 #endif
756   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
757   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
758   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
760 #if defined(PETSC_USE_CTABLE)
761   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
762 #else
763   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
764 #endif
765   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
767   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
773   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
776   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
780 #if defined(PETSC_USE_REAL_MAT_SINGLE)
781   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
782 #endif
783   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
786   ierr = PetscFree(mat->data);CHKERRQ(ierr);
787 
788   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
794 #if defined(PETSC_HAVE_ELEMENTAL)
795   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
796 #endif
797   PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
802 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
803 {
804   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
805   PetscErrorCode ierr;
806   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
807   PetscScalar    *x,*from;
808 
809   PetscFunctionBegin;
810   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
811   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
812 
813   /* diagonal part */
814   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
815   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
816 
817   /* subdiagonal part */
818   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
819 
820   /* copy x into the vec slvec0 */
821   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
822   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
823 
824   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
825   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
826   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
827 
828   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830   /* supperdiagonal part */
831   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
832   PetscFunctionReturn(0);
833 }
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatMult_MPISBAIJ"
837 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
838 {
839   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
840   PetscErrorCode ierr;
841   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
842   PetscScalar    *x,*from;
843 
844   PetscFunctionBegin;
845   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
846   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
847 
848   /* diagonal part */
849   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
850   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
851 
852   /* subdiagonal part */
853   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
854 
855   /* copy x into the vec slvec0 */
856   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
857   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
858 
859   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
860   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
861   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
862 
863   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
865   /* supperdiagonal part */
866   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
872 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
873 {
874   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
875   PetscErrorCode ierr;
876   PetscInt       nt;
877 
878   PetscFunctionBegin;
879   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
880   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
881 
882   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
883   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
884 
885   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   /* do diagonal part */
887   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
888   /* do supperdiagonal part */
889   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
891   /* do subdiagonal part */
892   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
893   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
894   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
900 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
901 {
902   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
903   PetscErrorCode ierr;
904   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
905   PetscScalar    *x,*from,zero=0.0;
906 
907   PetscFunctionBegin;
908   /*
909   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
910   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
911   */
912   /* diagonal part */
913   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
914   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
915 
916   /* subdiagonal part */
917   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
918 
919   /* copy x into the vec slvec0 */
920   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
921   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
922   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
923   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
924 
925   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
927   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928 
929   /* supperdiagonal part */
930   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
936 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
937 {
938   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
939   PetscErrorCode ierr;
940 
941   PetscFunctionBegin;
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   /* do diagonal part */
944   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
945   /* do supperdiagonal part */
946   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
948 
949   /* do subdiagonal part */
950   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
951   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957   This only works correctly for square matrices where the subblock A->A is the
958    diagonal block
959 */
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
969   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatScale_MPISBAIJ"
975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
976 {
977   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
982   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatGetRow_MPISBAIJ"
988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
989 {
990   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
991   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
992   PetscErrorCode ierr;
993   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
994   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
995   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
996 
997   PetscFunctionBegin;
998   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
999   mat->getrowactive = PETSC_TRUE;
1000 
1001   if (!mat->rowvalues && (idx || v)) {
1002     /*
1003         allocate enough space to hold information from the longest row.
1004     */
1005     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1006     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1007     PetscInt     max = 1,mbs = mat->mbs,tmp;
1008     for (i=0; i<mbs; i++) {
1009       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1010       if (max < tmp) max = tmp;
1011     }
1012     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1013   }
1014 
1015   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1016   lrow = row - brstart;  /* local row index */
1017 
1018   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1019   if (!v)   {pvA = 0; pvB = 0;}
1020   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1021   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1022   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1023   nztot = nzA + nzB;
1024 
1025   cmap = mat->garray;
1026   if (v  || idx) {
1027     if (nztot) {
1028       /* Sort by increasing column numbers, assuming A and B already sorted */
1029       PetscInt imark = -1;
1030       if (v) {
1031         *v = v_p = mat->rowvalues;
1032         for (i=0; i<nzB; i++) {
1033           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1034           else break;
1035         }
1036         imark = i;
1037         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1038         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1039       }
1040       if (idx) {
1041         *idx = idx_p = mat->rowindices;
1042         if (imark > -1) {
1043           for (i=0; i<imark; i++) {
1044             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1045           }
1046         } else {
1047           for (i=0; i<nzB; i++) {
1048             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1049             else break;
1050           }
1051           imark = i;
1052         }
1053         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1054         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1055       }
1056     } else {
1057       if (idx) *idx = 0;
1058       if (v)   *v   = 0;
1059     }
1060   }
1061   *nz  = nztot;
1062   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1063   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1069 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1070 {
1071   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1072 
1073   PetscFunctionBegin;
1074   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1075   baij->getrowactive = PETSC_FALSE;
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1081 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1082 {
1083   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1084   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1085 
1086   PetscFunctionBegin;
1087   aA->getrow_utriangular = PETSC_TRUE;
1088   PetscFunctionReturn(0);
1089 }
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1092 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1093 {
1094   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1095   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1096 
1097   PetscFunctionBegin;
1098   aA->getrow_utriangular = PETSC_FALSE;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1104 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1105 {
1106   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1111   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1117 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1118 {
1119   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1120   PetscErrorCode ierr;
1121 
1122   PetscFunctionBegin;
1123   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1124   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1130 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1131 {
1132   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1133   PetscErrorCode ierr;
1134 
1135   PetscFunctionBegin;
1136   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1137   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1143 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1144 {
1145   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1146   Mat            A  = a->A,B = a->B;
1147   PetscErrorCode ierr;
1148   PetscReal      isend[5],irecv[5];
1149 
1150   PetscFunctionBegin;
1151   info->block_size = (PetscReal)matin->rmap->bs;
1152 
1153   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1154 
1155   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1156   isend[3] = info->memory;  isend[4] = info->mallocs;
1157 
1158   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1159 
1160   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1161   isend[3] += info->memory;  isend[4] += info->mallocs;
1162   if (flag == MAT_LOCAL) {
1163     info->nz_used      = isend[0];
1164     info->nz_allocated = isend[1];
1165     info->nz_unneeded  = isend[2];
1166     info->memory       = isend[3];
1167     info->mallocs      = isend[4];
1168   } else if (flag == MAT_GLOBAL_MAX) {
1169     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1170 
1171     info->nz_used      = irecv[0];
1172     info->nz_allocated = irecv[1];
1173     info->nz_unneeded  = irecv[2];
1174     info->memory       = irecv[3];
1175     info->mallocs      = irecv[4];
1176   } else if (flag == MAT_GLOBAL_SUM) {
1177     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1178 
1179     info->nz_used      = irecv[0];
1180     info->nz_allocated = irecv[1];
1181     info->nz_unneeded  = irecv[2];
1182     info->memory       = irecv[3];
1183     info->mallocs      = irecv[4];
1184   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1185   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1186   info->fill_ratio_needed = 0;
1187   info->factor_mallocs    = 0;
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1193 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1194 {
1195   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1196   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   switch (op) {
1201   case MAT_NEW_NONZERO_LOCATIONS:
1202   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1203   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1204   case MAT_KEEP_NONZERO_PATTERN:
1205   case MAT_NEW_NONZERO_LOCATION_ERR:
1206     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1208     break;
1209   case MAT_ROW_ORIENTED:
1210     a->roworiented = flg;
1211 
1212     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1213     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1214     break;
1215   case MAT_NEW_DIAGONALS:
1216     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1217     break;
1218   case MAT_IGNORE_OFF_PROC_ENTRIES:
1219     a->donotstash = flg;
1220     break;
1221   case MAT_USE_HASH_TABLE:
1222     a->ht_flag = flg;
1223     break;
1224   case MAT_HERMITIAN:
1225     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1226     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1227 
1228     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1229     break;
1230   case MAT_SPD:
1231     A->spd_set = PETSC_TRUE;
1232     A->spd     = flg;
1233     if (flg) {
1234       A->symmetric                  = PETSC_TRUE;
1235       A->structurally_symmetric     = PETSC_TRUE;
1236       A->symmetric_set              = PETSC_TRUE;
1237       A->structurally_symmetric_set = PETSC_TRUE;
1238     }
1239     break;
1240   case MAT_SYMMETRIC:
1241     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1242     break;
1243   case MAT_STRUCTURALLY_SYMMETRIC:
1244     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1245     break;
1246   case MAT_SYMMETRY_ETERNAL:
1247     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1248     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1249     break;
1250   case MAT_IGNORE_LOWER_TRIANGULAR:
1251     aA->ignore_ltriangular = flg;
1252     break;
1253   case MAT_ERROR_LOWER_TRIANGULAR:
1254     aA->ignore_ltriangular = flg;
1255     break;
1256   case MAT_GETROW_UPPERTRIANGULAR:
1257     aA->getrow_utriangular = flg;
1258     break;
1259   default:
1260     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1267 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1268 {
1269   PetscErrorCode ierr;
1270 
1271   PetscFunctionBegin;
1272   if (MAT_INITIAL_MATRIX || *B != A) {
1273     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1274   }
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1280 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1281 {
1282   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1283   Mat            a     = baij->A, b=baij->B;
1284   PetscErrorCode ierr;
1285   PetscInt       nv,m,n;
1286   PetscBool      flg;
1287 
1288   PetscFunctionBegin;
1289   if (ll != rr) {
1290     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1291     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1292   }
1293   if (!ll) PetscFunctionReturn(0);
1294 
1295   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1296   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1297 
1298   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1299   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1300 
1301   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1302 
1303   /* left diagonalscale the off-diagonal part */
1304   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1305 
1306   /* scale the diagonal part */
1307   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1308 
1309   /* right diagonalscale the off-diagonal part */
1310   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1311   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1317 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1318 {
1319   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatEqual_MPISBAIJ"
1331 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1332 {
1333   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1334   Mat            a,b,c,d;
1335   PetscBool      flg;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   a = matA->A; b = matA->B;
1340   c = matB->A; d = matB->B;
1341 
1342   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1343   if (flg) {
1344     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1345   }
1346   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatCopy_MPISBAIJ"
1352 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1353 {
1354   PetscErrorCode ierr;
1355   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1356   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1357 
1358   PetscFunctionBegin;
1359   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1360   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1361     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1362     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1363     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1364   } else {
1365     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1366     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1373 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1374 {
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1384 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1385 {
1386   PetscErrorCode ierr;
1387   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1388   PetscBLASInt   bnz,one=1;
1389   Mat_SeqSBAIJ   *xa,*ya;
1390   Mat_SeqBAIJ    *xb,*yb;
1391 
1392   PetscFunctionBegin;
1393   if (str == SAME_NONZERO_PATTERN) {
1394     PetscScalar alpha = a;
1395     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1396     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1397     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1398     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1399     xb   = (Mat_SeqBAIJ*)xx->B->data;
1400     yb   = (Mat_SeqBAIJ*)yy->B->data;
1401     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1402     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1403     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1404   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1405     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1406     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1407     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1408   } else {
1409     Mat      B;
1410     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1411     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1412     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1413     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1414     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1415     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1416     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1417     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1418     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1419     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1420     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1421     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1422     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1423     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1424     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1425     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1426     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1427     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1428     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1429     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1430   }
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1436 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1437 {
1438   PetscErrorCode ierr;
1439   PetscInt       i;
1440   PetscBool      flg;
1441 
1442   PetscFunctionBegin;
1443   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1444   for (i=0; i<n; i++) {
1445     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1446     if (!flg) { /* *B[i] is non-symmetric, set flag */
1447       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1448     }
1449   }
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 /* -------------------------------------------------------------------*/
1454 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1455                                        MatGetRow_MPISBAIJ,
1456                                        MatRestoreRow_MPISBAIJ,
1457                                        MatMult_MPISBAIJ,
1458                                /*  4*/ MatMultAdd_MPISBAIJ,
1459                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1460                                        MatMultAdd_MPISBAIJ,
1461                                        0,
1462                                        0,
1463                                        0,
1464                                /* 10*/ 0,
1465                                        0,
1466                                        0,
1467                                        MatSOR_MPISBAIJ,
1468                                        MatTranspose_MPISBAIJ,
1469                                /* 15*/ MatGetInfo_MPISBAIJ,
1470                                        MatEqual_MPISBAIJ,
1471                                        MatGetDiagonal_MPISBAIJ,
1472                                        MatDiagonalScale_MPISBAIJ,
1473                                        MatNorm_MPISBAIJ,
1474                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1475                                        MatAssemblyEnd_MPISBAIJ,
1476                                        MatSetOption_MPISBAIJ,
1477                                        MatZeroEntries_MPISBAIJ,
1478                                /* 24*/ 0,
1479                                        0,
1480                                        0,
1481                                        0,
1482                                        0,
1483                                /* 29*/ MatSetUp_MPISBAIJ,
1484                                        0,
1485                                        0,
1486                                        0,
1487                                        0,
1488                                /* 34*/ MatDuplicate_MPISBAIJ,
1489                                        0,
1490                                        0,
1491                                        0,
1492                                        0,
1493                                /* 39*/ MatAXPY_MPISBAIJ,
1494                                        MatGetSubMatrices_MPISBAIJ,
1495                                        MatIncreaseOverlap_MPISBAIJ,
1496                                        MatGetValues_MPISBAIJ,
1497                                        MatCopy_MPISBAIJ,
1498                                /* 44*/ 0,
1499                                        MatScale_MPISBAIJ,
1500                                        0,
1501                                        0,
1502                                        0,
1503                                /* 49*/ 0,
1504                                        0,
1505                                        0,
1506                                        0,
1507                                        0,
1508                                /* 54*/ 0,
1509                                        0,
1510                                        MatSetUnfactored_MPISBAIJ,
1511                                        0,
1512                                        MatSetValuesBlocked_MPISBAIJ,
1513                                /* 59*/ 0,
1514                                        0,
1515                                        0,
1516                                        0,
1517                                        0,
1518                                /* 64*/ 0,
1519                                        0,
1520                                        0,
1521                                        0,
1522                                        0,
1523                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1524                                        0,
1525                                        0,
1526                                        0,
1527                                        0,
1528                                /* 74*/ 0,
1529                                        0,
1530                                        0,
1531                                        0,
1532                                        0,
1533                                /* 79*/ 0,
1534                                        0,
1535                                        0,
1536                                        0,
1537                                        MatLoad_MPISBAIJ,
1538                                /* 84*/ 0,
1539                                        0,
1540                                        0,
1541                                        0,
1542                                        0,
1543                                /* 89*/ 0,
1544                                        0,
1545                                        0,
1546                                        0,
1547                                        0,
1548                                /* 94*/ 0,
1549                                        0,
1550                                        0,
1551                                        0,
1552                                        0,
1553                                /* 99*/ 0,
1554                                        0,
1555                                        0,
1556                                        0,
1557                                        0,
1558                                /*104*/ 0,
1559                                        MatRealPart_MPISBAIJ,
1560                                        MatImaginaryPart_MPISBAIJ,
1561                                        MatGetRowUpperTriangular_MPISBAIJ,
1562                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1563                                /*109*/ 0,
1564                                        0,
1565                                        0,
1566                                        0,
1567                                        0,
1568                                /*114*/ 0,
1569                                        0,
1570                                        0,
1571                                        0,
1572                                        0,
1573                                /*119*/ 0,
1574                                        0,
1575                                        0,
1576                                        0,
1577                                        0,
1578                                /*124*/ 0,
1579                                        0,
1580                                        0,
1581                                        0,
1582                                        0,
1583                                /*129*/ 0,
1584                                        0,
1585                                        0,
1586                                        0,
1587                                        0,
1588                                /*134*/ 0,
1589                                        0,
1590                                        0,
1591                                        0,
1592                                        0,
1593                                /*139*/ 0,
1594                                        0,
1595                                        0,
1596                                        0,
1597                                        0,
1598                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1599 };
1600 
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1603 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1604 {
1605   PetscFunctionBegin;
1606   *a = ((Mat_MPISBAIJ*)A->data)->A;
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 #undef __FUNCT__
1611 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1612 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1613 {
1614   Mat_MPISBAIJ   *b;
1615   PetscErrorCode ierr;
1616   PetscInt       i,mbs,Mbs;
1617 
1618   PetscFunctionBegin;
1619   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1620   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1621   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1622   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1623 
1624   b   = (Mat_MPISBAIJ*)B->data;
1625   mbs = B->rmap->n/bs;
1626   Mbs = B->rmap->N/bs;
1627   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1628 
1629   B->rmap->bs = bs;
1630   b->bs2      = bs*bs;
1631   b->mbs      = mbs;
1632   b->Mbs      = Mbs;
1633   b->nbs      = B->cmap->n/bs;
1634   b->Nbs      = B->cmap->N/bs;
1635 
1636   for (i=0; i<=b->size; i++) {
1637     b->rangebs[i] = B->rmap->range[i]/bs;
1638   }
1639   b->rstartbs = B->rmap->rstart/bs;
1640   b->rendbs   = B->rmap->rend/bs;
1641 
1642   b->cstartbs = B->cmap->rstart/bs;
1643   b->cendbs   = B->cmap->rend/bs;
1644 
1645   if (!B->preallocated) {
1646     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1647     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1648     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1649     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1650     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1651     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1652     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1653     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1654     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1655   }
1656 
1657   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1658   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1659 
1660   B->preallocated = PETSC_TRUE;
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1666 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1667 {
1668   PetscInt       m,rstart,cstart,cend;
1669   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1670   const PetscInt *JJ    =0;
1671   PetscScalar    *values=0;
1672   PetscErrorCode ierr;
1673 
1674   PetscFunctionBegin;
1675   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1676   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1677   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1678   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1679   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1680   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1681   m      = B->rmap->n/bs;
1682   rstart = B->rmap->rstart/bs;
1683   cstart = B->cmap->rstart/bs;
1684   cend   = B->cmap->rend/bs;
1685 
1686   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1687   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1688   for (i=0; i<m; i++) {
1689     nz = ii[i+1] - ii[i];
1690     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1691     nz_max = PetscMax(nz_max,nz);
1692     JJ     = jj + ii[i];
1693     for (j=0; j<nz; j++) {
1694       if (*JJ >= cstart) break;
1695       JJ++;
1696     }
1697     d = 0;
1698     for (; j<nz; j++) {
1699       if (*JJ++ >= cend) break;
1700       d++;
1701     }
1702     d_nnz[i] = d;
1703     o_nnz[i] = nz - d;
1704   }
1705   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1706   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1707 
1708   values = (PetscScalar*)V;
1709   if (!values) {
1710     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1711     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1712   }
1713   for (i=0; i<m; i++) {
1714     PetscInt          row    = i + rstart;
1715     PetscInt          ncols  = ii[i+1] - ii[i];
1716     const PetscInt    *icols = jj + ii[i];
1717     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1718     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1719   }
1720 
1721   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1722   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 /*MC
1729    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1730    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1731    the matrix is stored.
1732 
1733   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1734   can call MatSetOption(Mat, MAT_HERMITIAN);
1735 
1736    Options Database Keys:
1737 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1738 
1739   Level: beginner
1740 
1741 .seealso: MatCreateMPISBAIJ
1742 M*/
1743 
1744 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatCreate_MPISBAIJ"
1748 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1749 {
1750   Mat_MPISBAIJ   *b;
1751   PetscErrorCode ierr;
1752   PetscBool      flg = PETSC_FALSE;
1753 
1754   PetscFunctionBegin;
1755   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1756   B->data = (void*)b;
1757   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1758 
1759   B->ops->destroy = MatDestroy_MPISBAIJ;
1760   B->ops->view    = MatView_MPISBAIJ;
1761   B->assembled    = PETSC_FALSE;
1762   B->insertmode   = NOT_SET_VALUES;
1763 
1764   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1765   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1766 
1767   /* build local table of row and column ownerships */
1768   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
1769 
1770   /* build cache for off array entries formed */
1771   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1772 
1773   b->donotstash  = PETSC_FALSE;
1774   b->colmap      = NULL;
1775   b->garray      = NULL;
1776   b->roworiented = PETSC_TRUE;
1777 
1778   /* stuff used in block assembly */
1779   b->barray = 0;
1780 
1781   /* stuff used for matrix vector multiply */
1782   b->lvec    = 0;
1783   b->Mvctx   = 0;
1784   b->slvec0  = 0;
1785   b->slvec0b = 0;
1786   b->slvec1  = 0;
1787   b->slvec1a = 0;
1788   b->slvec1b = 0;
1789   b->sMvctx  = 0;
1790 
1791   /* stuff for MatGetRow() */
1792   b->rowindices   = 0;
1793   b->rowvalues    = 0;
1794   b->getrowactive = PETSC_FALSE;
1795 
1796   /* hash table stuff */
1797   b->ht           = 0;
1798   b->hd           = 0;
1799   b->ht_size      = 0;
1800   b->ht_flag      = PETSC_FALSE;
1801   b->ht_fact      = 0;
1802   b->ht_total_ct  = 0;
1803   b->ht_insert_ct = 0;
1804 
1805   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1806   b->ijonly = PETSC_FALSE;
1807 
1808   b->in_loc = 0;
1809   b->v_loc  = 0;
1810   b->n_loc  = 0;
1811 
1812   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1814   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1816   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1817   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1818 #if defined(PETSC_HAVE_ELEMENTAL)
1819   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
1820 #endif
1821 
1822   B->symmetric                  = PETSC_TRUE;
1823   B->structurally_symmetric     = PETSC_TRUE;
1824   B->symmetric_set              = PETSC_TRUE;
1825   B->structurally_symmetric_set = PETSC_TRUE;
1826 
1827   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1828   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1829   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
1830   if (flg) {
1831     PetscReal fact = 1.39;
1832     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1833     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1834     if (fact <= 1.0) fact = 1.39;
1835     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1836     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1837   }
1838   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 /*MC
1843    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1844 
1845    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1846    and MATMPISBAIJ otherwise.
1847 
1848    Options Database Keys:
1849 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1850 
1851   Level: beginner
1852 
1853 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1854 M*/
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1858 /*@C
1859    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1860    the user should preallocate the matrix storage by setting the parameters
1861    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1862    performance can be increased by more than a factor of 50.
1863 
1864    Collective on Mat
1865 
1866    Input Parameters:
1867 +  B - the matrix
1868 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1869           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1870 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1871            submatrix  (same for all local rows)
1872 .  d_nnz - array containing the number of block nonzeros in the various block rows
1873            in the upper triangular and diagonal part of the in diagonal portion of the local
1874            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1875            for the diagonal entry and set a value even if it is zero.
1876 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1877            submatrix (same for all local rows).
1878 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1879            off-diagonal portion of the local submatrix that is right of the diagonal
1880            (possibly different for each block row) or NULL.
1881 
1882 
1883    Options Database Keys:
1884 .   -mat_no_unroll - uses code that does not unroll the loops in the
1885                      block calculations (much slower)
1886 .   -mat_block_size - size of the blocks to use
1887 
1888    Notes:
1889 
1890    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1891    than it must be used on all processors that share the object for that argument.
1892 
1893    If the *_nnz parameter is given then the *_nz parameter is ignored
1894 
1895    Storage Information:
1896    For a square global matrix we define each processor's diagonal portion
1897    to be its local rows and the corresponding columns (a square submatrix);
1898    each processor's off-diagonal portion encompasses the remainder of the
1899    local matrix (a rectangular submatrix).
1900 
1901    The user can specify preallocated storage for the diagonal part of
1902    the local submatrix with either d_nz or d_nnz (not both).  Set
1903    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1904    memory allocation.  Likewise, specify preallocated storage for the
1905    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1906 
1907    You can call MatGetInfo() to get information on how effective the preallocation was;
1908    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1909    You can also run with the option -info and look for messages with the string
1910    malloc in them to see if additional memory allocation was needed.
1911 
1912    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1913    the figure below we depict these three local rows and all columns (0-11).
1914 
1915 .vb
1916            0 1 2 3 4 5 6 7 8 9 10 11
1917           --------------------------
1918    row 3  |. . . d d d o o o o  o  o
1919    row 4  |. . . d d d o o o o  o  o
1920    row 5  |. . . d d d o o o o  o  o
1921           --------------------------
1922 .ve
1923 
1924    Thus, any entries in the d locations are stored in the d (diagonal)
1925    submatrix, and any entries in the o locations are stored in the
1926    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1927    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1928 
1929    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1930    plus the diagonal part of the d matrix,
1931    and o_nz should indicate the number of block nonzeros per row in the o matrix
1932 
1933    In general, for PDE problems in which most nonzeros are near the diagonal,
1934    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1935    or you will get TERRIBLE performance; see the users' manual chapter on
1936    matrices.
1937 
1938    Level: intermediate
1939 
1940 .keywords: matrix, block, aij, compressed row, sparse, parallel
1941 
1942 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1943 @*/
1944 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1945 {
1946   PetscErrorCode ierr;
1947 
1948   PetscFunctionBegin;
1949   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1950   PetscValidType(B,1);
1951   PetscValidLogicalCollectiveInt(B,bs,2);
1952   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "MatCreateSBAIJ"
1958 /*@C
1959    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1960    (block compressed row).  For good matrix assembly performance
1961    the user should preallocate the matrix storage by setting the parameters
1962    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1963    performance can be increased by more than a factor of 50.
1964 
1965    Collective on MPI_Comm
1966 
1967    Input Parameters:
1968 +  comm - MPI communicator
1969 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1970           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1971 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1972            This value should be the same as the local size used in creating the
1973            y vector for the matrix-vector product y = Ax.
1974 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1975            This value should be the same as the local size used in creating the
1976            x vector for the matrix-vector product y = Ax.
1977 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1978 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1979 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1980            submatrix  (same for all local rows)
1981 .  d_nnz - array containing the number of block nonzeros in the various block rows
1982            in the upper triangular portion of the in diagonal portion of the local
1983            (possibly different for each block block row) or NULL.
1984            If you plan to factor the matrix you must leave room for the diagonal entry and
1985            set its value even if it is zero.
1986 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1987            submatrix (same for all local rows).
1988 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1989            off-diagonal portion of the local submatrix (possibly different for
1990            each block row) or NULL.
1991 
1992    Output Parameter:
1993 .  A - the matrix
1994 
1995    Options Database Keys:
1996 .   -mat_no_unroll - uses code that does not unroll the loops in the
1997                      block calculations (much slower)
1998 .   -mat_block_size - size of the blocks to use
1999 .   -mat_mpi - use the parallel matrix data structures even on one processor
2000                (defaults to using SeqBAIJ format on one processor)
2001 
2002    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2003    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2004    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2005 
2006    Notes:
2007    The number of rows and columns must be divisible by blocksize.
2008    This matrix type does not support complex Hermitian operation.
2009 
2010    The user MUST specify either the local or global matrix dimensions
2011    (possibly both).
2012 
2013    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2014    than it must be used on all processors that share the object for that argument.
2015 
2016    If the *_nnz parameter is given then the *_nz parameter is ignored
2017 
2018    Storage Information:
2019    For a square global matrix we define each processor's diagonal portion
2020    to be its local rows and the corresponding columns (a square submatrix);
2021    each processor's off-diagonal portion encompasses the remainder of the
2022    local matrix (a rectangular submatrix).
2023 
2024    The user can specify preallocated storage for the diagonal part of
2025    the local submatrix with either d_nz or d_nnz (not both).  Set
2026    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2027    memory allocation.  Likewise, specify preallocated storage for the
2028    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2029 
2030    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2031    the figure below we depict these three local rows and all columns (0-11).
2032 
2033 .vb
2034            0 1 2 3 4 5 6 7 8 9 10 11
2035           --------------------------
2036    row 3  |. . . d d d o o o o  o  o
2037    row 4  |. . . d d d o o o o  o  o
2038    row 5  |. . . d d d o o o o  o  o
2039           --------------------------
2040 .ve
2041 
2042    Thus, any entries in the d locations are stored in the d (diagonal)
2043    submatrix, and any entries in the o locations are stored in the
2044    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2045    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2046 
2047    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2048    plus the diagonal part of the d matrix,
2049    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2050    In general, for PDE problems in which most nonzeros are near the diagonal,
2051    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2052    or you will get TERRIBLE performance; see the users' manual chapter on
2053    matrices.
2054 
2055    Level: intermediate
2056 
2057 .keywords: matrix, block, aij, compressed row, sparse, parallel
2058 
2059 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2060 @*/
2061 
2062 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2063 {
2064   PetscErrorCode ierr;
2065   PetscMPIInt    size;
2066 
2067   PetscFunctionBegin;
2068   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2069   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2070   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2071   if (size > 1) {
2072     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2073     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2074   } else {
2075     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2076     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2077   }
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 
2082 #undef __FUNCT__
2083 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2084 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2085 {
2086   Mat            mat;
2087   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2088   PetscErrorCode ierr;
2089   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2090   PetscScalar    *array;
2091 
2092   PetscFunctionBegin;
2093   *newmat = 0;
2094 
2095   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2096   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2097   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2098   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2099   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2100   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2101 
2102   mat->factortype   = matin->factortype;
2103   mat->preallocated = PETSC_TRUE;
2104   mat->assembled    = PETSC_TRUE;
2105   mat->insertmode   = NOT_SET_VALUES;
2106 
2107   a      = (Mat_MPISBAIJ*)mat->data;
2108   a->bs2 = oldmat->bs2;
2109   a->mbs = oldmat->mbs;
2110   a->nbs = oldmat->nbs;
2111   a->Mbs = oldmat->Mbs;
2112   a->Nbs = oldmat->Nbs;
2113 
2114 
2115   a->size         = oldmat->size;
2116   a->rank         = oldmat->rank;
2117   a->donotstash   = oldmat->donotstash;
2118   a->roworiented  = oldmat->roworiented;
2119   a->rowindices   = 0;
2120   a->rowvalues    = 0;
2121   a->getrowactive = PETSC_FALSE;
2122   a->barray       = 0;
2123   a->rstartbs     = oldmat->rstartbs;
2124   a->rendbs       = oldmat->rendbs;
2125   a->cstartbs     = oldmat->cstartbs;
2126   a->cendbs       = oldmat->cendbs;
2127 
2128   /* hash table stuff */
2129   a->ht           = 0;
2130   a->hd           = 0;
2131   a->ht_size      = 0;
2132   a->ht_flag      = oldmat->ht_flag;
2133   a->ht_fact      = oldmat->ht_fact;
2134   a->ht_total_ct  = 0;
2135   a->ht_insert_ct = 0;
2136 
2137   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2138   if (oldmat->colmap) {
2139 #if defined(PETSC_USE_CTABLE)
2140     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2141 #else
2142     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2143     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2144     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2145 #endif
2146   } else a->colmap = 0;
2147 
2148   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2149     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2150     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2151     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2152   } else a->garray = 0;
2153 
2154   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2155   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2156   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2157   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2158   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2159 
2160   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2161   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2162   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2163   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2164 
2165   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2166   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2167   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2168   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2169   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2170   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2171   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2172   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2173   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2174   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2175   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2176   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2177   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2178 
2179   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2180   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2181   a->sMvctx = oldmat->sMvctx;
2182   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2183 
2184   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2185   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2186   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2187   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2188   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2189   *newmat = mat;
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatLoad_MPISBAIJ"
2195 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2196 {
2197   PetscErrorCode ierr;
2198   PetscInt       i,nz,j,rstart,rend;
2199   PetscScalar    *vals,*buf;
2200   MPI_Comm       comm;
2201   MPI_Status     status;
2202   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2203   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2204   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2205   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2206   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2207   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2208   int            fd;
2209 
2210   PetscFunctionBegin;
2211   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2212   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2213   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2214   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2215   if (bs < 0) bs = 1;
2216 
2217   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2218   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2219   if (!rank) {
2220     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2221     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2222     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2223     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2224   }
2225 
2226   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2227 
2228   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2229   M    = header[1];
2230   N    = header[2];
2231 
2232   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2233   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2234   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2235 
2236   /* If global sizes are set, check if they are consistent with that given in the file */
2237   if (sizesset) {
2238     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2239   }
2240   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2241   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2242 
2243   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2244 
2245   /*
2246      This code adds extra rows to make sure the number of rows is
2247      divisible by the blocksize
2248   */
2249   Mbs        = M/bs;
2250   extra_rows = bs - M + bs*(Mbs);
2251   if (extra_rows == bs) extra_rows = 0;
2252   else                  Mbs++;
2253   if (extra_rows &&!rank) {
2254     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2255   }
2256 
2257   /* determine ownership of all rows */
2258   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2259     mbs = Mbs/size + ((Mbs % size) > rank);
2260     m   = mbs*bs;
2261   } else { /* User Set */
2262     m   = newmat->rmap->n;
2263     mbs = m/bs;
2264   }
2265   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2266   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2267   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2268   rowners[0] = 0;
2269   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2270   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2271   rstart = rowners[rank];
2272   rend   = rowners[rank+1];
2273 
2274   /* distribute row lengths to all processors */
2275   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2276   if (!rank) {
2277     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2278     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2279     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2280     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2281     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2282     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2283     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2284   } else {
2285     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2286   }
2287 
2288   if (!rank) {   /* procs[0] */
2289     /* calculate the number of nonzeros on each processor */
2290     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2291     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2292     for (i=0; i<size; i++) {
2293       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2294         procsnz[i] += rowlengths[j];
2295       }
2296     }
2297     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2298 
2299     /* determine max buffer needed and allocate it */
2300     maxnz = 0;
2301     for (i=0; i<size; i++) {
2302       maxnz = PetscMax(maxnz,procsnz[i]);
2303     }
2304     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2305 
2306     /* read in my part of the matrix column indices  */
2307     nz     = procsnz[0];
2308     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2309     mycols = ibuf;
2310     if (size == 1) nz -= extra_rows;
2311     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2312     if (size == 1) {
2313       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2314     }
2315 
2316     /* read in every ones (except the last) and ship off */
2317     for (i=1; i<size-1; i++) {
2318       nz   = procsnz[i];
2319       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2320       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2321     }
2322     /* read in the stuff for the last proc */
2323     if (size != 1) {
2324       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2325       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2326       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2327       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2328     }
2329     ierr = PetscFree(cols);CHKERRQ(ierr);
2330   } else {  /* procs[i], i>0 */
2331     /* determine buffer space needed for message */
2332     nz = 0;
2333     for (i=0; i<m; i++) nz += locrowlens[i];
2334     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2335     mycols = ibuf;
2336     /* receive message of column indices*/
2337     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2338     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2339     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2340   }
2341 
2342   /* loop over local rows, determining number of off diagonal entries */
2343   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2344   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2345   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2346   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2347   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2348   rowcount = 0;
2349   nzcount  = 0;
2350   for (i=0; i<mbs; i++) {
2351     dcount  = 0;
2352     odcount = 0;
2353     for (j=0; j<bs; j++) {
2354       kmax = locrowlens[rowcount];
2355       for (k=0; k<kmax; k++) {
2356         tmp = mycols[nzcount++]/bs; /* block col. index */
2357         if (!mask[tmp]) {
2358           mask[tmp] = 1;
2359           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2360           else masked1[dcount++] = tmp; /* entry in diag portion */
2361         }
2362       }
2363       rowcount++;
2364     }
2365 
2366     dlens[i]  = dcount;  /* d_nzz[i] */
2367     odlens[i] = odcount; /* o_nzz[i] */
2368 
2369     /* zero out the mask elements we set */
2370     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2371     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2372   }
2373   if (!sizesset) {
2374     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2375   }
2376   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2377   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2378 
2379   if (!rank) {
2380     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2381     /* read in my part of the matrix numerical values  */
2382     nz     = procsnz[0];
2383     vals   = buf;
2384     mycols = ibuf;
2385     if (size == 1) nz -= extra_rows;
2386     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2387     if (size == 1) {
2388       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2389     }
2390 
2391     /* insert into matrix */
2392     jj = rstart*bs;
2393     for (i=0; i<m; i++) {
2394       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2395       mycols += locrowlens[i];
2396       vals   += locrowlens[i];
2397       jj++;
2398     }
2399 
2400     /* read in other processors (except the last one) and ship out */
2401     for (i=1; i<size-1; i++) {
2402       nz   = procsnz[i];
2403       vals = buf;
2404       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2405       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2406     }
2407     /* the last proc */
2408     if (size != 1) {
2409       nz   = procsnz[i] - extra_rows;
2410       vals = buf;
2411       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2412       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2413       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2414     }
2415     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2416 
2417   } else {
2418     /* receive numeric values */
2419     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2420 
2421     /* receive message of values*/
2422     vals   = buf;
2423     mycols = ibuf;
2424     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2425     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2426     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2427 
2428     /* insert into matrix */
2429     jj = rstart*bs;
2430     for (i=0; i<m; i++) {
2431       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2432       mycols += locrowlens[i];
2433       vals   += locrowlens[i];
2434       jj++;
2435     }
2436   }
2437 
2438   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2439   ierr = PetscFree(buf);CHKERRQ(ierr);
2440   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2441   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2442   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2443   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2444   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2445   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 #undef __FUNCT__
2450 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2451 /*XXXXX@
2452    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2453 
2454    Input Parameters:
2455 .  mat  - the matrix
2456 .  fact - factor
2457 
2458    Not Collective on Mat, each process can have a different hash factor
2459 
2460    Level: advanced
2461 
2462   Notes:
2463    This can also be set by the command line option: -mat_use_hash_table fact
2464 
2465 .keywords: matrix, hashtable, factor, HT
2466 
2467 .seealso: MatSetOption()
2468 @XXXXX*/
2469 
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2473 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2474 {
2475   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2476   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2477   PetscReal      atmp;
2478   PetscReal      *work,*svalues,*rvalues;
2479   PetscErrorCode ierr;
2480   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2481   PetscMPIInt    rank,size;
2482   PetscInt       *rowners_bs,dest,count,source;
2483   PetscScalar    *va;
2484   MatScalar      *ba;
2485   MPI_Status     stat;
2486 
2487   PetscFunctionBegin;
2488   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2489   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2490   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2491 
2492   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2493   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2494 
2495   bs  = A->rmap->bs;
2496   mbs = a->mbs;
2497   Mbs = a->Mbs;
2498   ba  = b->a;
2499   bi  = b->i;
2500   bj  = b->j;
2501 
2502   /* find ownerships */
2503   rowners_bs = A->rmap->range;
2504 
2505   /* each proc creates an array to be distributed */
2506   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2507   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2508 
2509   /* row_max for B */
2510   if (rank != size-1) {
2511     for (i=0; i<mbs; i++) {
2512       ncols = bi[1] - bi[0]; bi++;
2513       brow  = bs*i;
2514       for (j=0; j<ncols; j++) {
2515         bcol = bs*(*bj);
2516         for (kcol=0; kcol<bs; kcol++) {
2517           col  = bcol + kcol;                /* local col index */
2518           col += rowners_bs[rank+1];      /* global col index */
2519           for (krow=0; krow<bs; krow++) {
2520             atmp = PetscAbsScalar(*ba); ba++;
2521             row  = brow + krow;   /* local row index */
2522             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2523             if (work[col] < atmp) work[col] = atmp;
2524           }
2525         }
2526         bj++;
2527       }
2528     }
2529 
2530     /* send values to its owners */
2531     for (dest=rank+1; dest<size; dest++) {
2532       svalues = work + rowners_bs[dest];
2533       count   = rowners_bs[dest+1]-rowners_bs[dest];
2534       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2535     }
2536   }
2537 
2538   /* receive values */
2539   if (rank) {
2540     rvalues = work;
2541     count   = rowners_bs[rank+1]-rowners_bs[rank];
2542     for (source=0; source<rank; source++) {
2543       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2544       /* process values */
2545       for (i=0; i<count; i++) {
2546         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2547       }
2548     }
2549   }
2550 
2551   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2552   ierr = PetscFree(work);CHKERRQ(ierr);
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 #undef __FUNCT__
2557 #define __FUNCT__ "MatSOR_MPISBAIJ"
2558 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2559 {
2560   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2561   PetscErrorCode    ierr;
2562   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2563   PetscScalar       *x,*ptr,*from;
2564   Vec               bb1;
2565   const PetscScalar *b;
2566 
2567   PetscFunctionBegin;
2568   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2569   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2570 
2571   if (flag == SOR_APPLY_UPPER) {
2572     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2573     PetscFunctionReturn(0);
2574   }
2575 
2576   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2577     if (flag & SOR_ZERO_INITIAL_GUESS) {
2578       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2579       its--;
2580     }
2581 
2582     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2583     while (its--) {
2584 
2585       /* lower triangular part: slvec0b = - B^T*xx */
2586       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2587 
2588       /* copy xx into slvec0a */
2589       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2590       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2591       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2592       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2593 
2594       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2595 
2596       /* copy bb into slvec1a */
2597       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2598       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2599       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2600       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2601 
2602       /* set slvec1b = 0 */
2603       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2604 
2605       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2606       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2607       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2608       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2609 
2610       /* upper triangular part: bb1 = bb1 - B*x */
2611       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2612 
2613       /* local diagonal sweep */
2614       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2615     }
2616     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2617   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2618     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2619   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2620     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2621   } else if (flag & SOR_EISENSTAT) {
2622     Vec               xx1;
2623     PetscBool         hasop;
2624     const PetscScalar *diag;
2625     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2626     PetscInt          i,n;
2627 
2628     if (!mat->xx1) {
2629       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2630       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2631     }
2632     xx1 = mat->xx1;
2633     bb1 = mat->bb1;
2634 
2635     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2636 
2637     if (!mat->diag) {
2638       /* this is wrong for same matrix with new nonzero values */
2639       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2640       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2641     }
2642     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2643 
2644     if (hasop) {
2645       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2646       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2647     } else {
2648       /*
2649           These two lines are replaced by code that may be a bit faster for a good compiler
2650       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2651       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2652       */
2653       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2654       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2655       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2656       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2657       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2658       if (omega == 1.0) {
2659         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2660         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2661       } else {
2662         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2663         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2664       }
2665       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2666       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2667       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2668       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2669     }
2670 
2671     /* multiply off-diagonal portion of matrix */
2672     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2673     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2674     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2675     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2676     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2677     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2678     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2679     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2680     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2681     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2682 
2683     /* local sweep */
2684     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2685     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2686   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2692 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2693 {
2694   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2695   PetscErrorCode ierr;
2696   Vec            lvec1,bb1;
2697 
2698   PetscFunctionBegin;
2699   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2700   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2701 
2702   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2703     if (flag & SOR_ZERO_INITIAL_GUESS) {
2704       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2705       its--;
2706     }
2707 
2708     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2709     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2710     while (its--) {
2711       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2712 
2713       /* lower diagonal part: bb1 = bb - B^T*xx */
2714       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2715       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2716 
2717       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2718       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2719       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2720 
2721       /* upper diagonal part: bb1 = bb1 - B*x */
2722       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2723       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2724 
2725       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2726 
2727       /* diagonal sweep */
2728       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2729     }
2730     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2731     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2732   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2738 /*@
2739      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2740          CSR format the local rows.
2741 
2742    Collective on MPI_Comm
2743 
2744    Input Parameters:
2745 +  comm - MPI communicator
2746 .  bs - the block size, only a block size of 1 is supported
2747 .  m - number of local rows (Cannot be PETSC_DECIDE)
2748 .  n - This value should be the same as the local size used in creating the
2749        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2750        calculated if N is given) For square matrices n is almost always m.
2751 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2752 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2753 .   i - row indices
2754 .   j - column indices
2755 -   a - matrix values
2756 
2757    Output Parameter:
2758 .   mat - the matrix
2759 
2760    Level: intermediate
2761 
2762    Notes:
2763        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2764      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2765      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2766 
2767        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2768 
2769 .keywords: matrix, aij, compressed row, sparse, parallel
2770 
2771 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2772           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2773 @*/
2774 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2775 {
2776   PetscErrorCode ierr;
2777 
2778 
2779   PetscFunctionBegin;
2780   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2781   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2782   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2783   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2784   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2785   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2786   PetscFunctionReturn(0);
2787 }
2788 
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2792 /*@C
2793    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2794    (the default parallel PETSc format).
2795 
2796    Collective on MPI_Comm
2797 
2798    Input Parameters:
2799 +  B - the matrix
2800 .  bs - the block size
2801 .  i - the indices into j for the start of each local row (starts with zero)
2802 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2803 -  v - optional values in the matrix
2804 
2805    Level: developer
2806 
2807 .keywords: matrix, aij, compressed row, sparse, parallel
2808 
2809 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2810 @*/
2811 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2812 {
2813   PetscErrorCode ierr;
2814 
2815   PetscFunctionBegin;
2816   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 #undef __FUNCT__
2821 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
2822 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2823 {
2824   PetscErrorCode ierr;
2825   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2826   PetscInt       *indx;
2827   PetscScalar    *values;
2828 
2829   PetscFunctionBegin;
2830   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2831   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2832     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2833     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
2834     PetscInt       *bindx,rmax=a->rmax,j;
2835 
2836     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2837     mbs = m/bs; Nbs = N/cbs;
2838     if (n == PETSC_DECIDE) {
2839       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
2840     }
2841     /* Check sum(n) = Nbs */
2842     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2843     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
2844 
2845     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2846     rstart -= mbs;
2847 
2848     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2849     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
2850     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2851     for (i=0; i<mbs; i++) {
2852       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2853       nnz = nnz/bs;
2854       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2855       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2856       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2857     }
2858     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2859     ierr = PetscFree(bindx);CHKERRQ(ierr);
2860 
2861     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2862     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2863     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2864     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2865     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2866     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2867   }
2868 
2869   /* numeric phase */
2870   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2871   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2872 
2873   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2874   for (i=0; i<m; i++) {
2875     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2876     Ii   = i + rstart;
2877     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2878     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2879   }
2880   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2881   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2882   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2883   PetscFunctionReturn(0);
2884 }
2885