xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision ae5cfb4a3679d07b6904bd2c794eee8dc00ed47b)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 #if defined(PETSC_HAVE_ELEMENTAL)
23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
24 #endif
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
28 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
29 {
30   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
31   PetscErrorCode ierr;
32 
33   PetscFunctionBegin;
34   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
35   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
53   { \
54  \
55     brow = row/bs;  \
56     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
57     rmax = aimax[brow]; nrow = ailen[brow]; \
58     bcol = col/bs; \
59     ridx = row % bs; cidx = col % bs; \
60     low  = 0; high = nrow; \
61     while (high-low > 3) { \
62       t = (low+high)/2; \
63       if (rp[t] > bcol) high = t; \
64       else              low  = t; \
65     } \
66     for (_i=low; _i<high; _i++) { \
67       if (rp[_i] > bcol) break; \
68       if (rp[_i] == bcol) { \
69         bap = ap + bs2*_i + bs*cidx + ridx; \
70         if (addv == ADD_VALUES) *bap += value;  \
71         else                    *bap  = value;  \
72         goto a_noinsert; \
73       } \
74     } \
75     if (a->nonew == 1) goto a_noinsert; \
76     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
77     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
78     N = nrow++ - 1;  \
79     /* shift up all the later entries in this row */ \
80     for (ii=N; ii>=_i; ii--) { \
81       rp[ii+1] = rp[ii]; \
82       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
83     } \
84     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
85     rp[_i]                      = bcol;  \
86     ap[bs2*_i + bs*cidx + ridx] = value;  \
87     A->nonzerostate++;\
88 a_noinsert:; \
89     ailen[brow] = nrow; \
90   }
91 
92 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
93   { \
94     brow = row/bs;  \
95     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
96     rmax = bimax[brow]; nrow = bilen[brow]; \
97     bcol = col/bs; \
98     ridx = row % bs; cidx = col % bs; \
99     low  = 0; high = nrow; \
100     while (high-low > 3) { \
101       t = (low+high)/2; \
102       if (rp[t] > bcol) high = t; \
103       else              low  = t; \
104     } \
105     for (_i=low; _i<high; _i++) { \
106       if (rp[_i] > bcol) break; \
107       if (rp[_i] == bcol) { \
108         bap = ap + bs2*_i + bs*cidx + ridx; \
109         if (addv == ADD_VALUES) *bap += value;  \
110         else                    *bap  = value;  \
111         goto b_noinsert; \
112       } \
113     } \
114     if (b->nonew == 1) goto b_noinsert; \
115     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
116     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
117     N = nrow++ - 1;  \
118     /* shift up all the later entries in this row */ \
119     for (ii=N; ii>=_i; ii--) { \
120       rp[ii+1] = rp[ii]; \
121       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122     } \
123     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
124     rp[_i]                      = bcol;  \
125     ap[bs2*_i + bs*cidx + ridx] = value;  \
126     B->nonzerostate++;\
127 b_noinsert:; \
128     bilen[brow] = nrow; \
129   }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat          A     = baij->A;
149   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar    *aa   =a->a;
152 
153   Mat         B     = baij->B;
154   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar   *ba   =b->a;
157 
158   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar *ap,*bap;
161 
162   /* for stash */
163   PetscInt  n_loc, *in_loc = NULL;
164   MatScalar *v_loc = NULL;
165 
166   PetscFunctionBegin;
167   if (!baij->donotstash) {
168     if (n > baij->n_loc) {
169       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
170       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
171       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
173 
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs) {
189           if (a->ignore_ltriangular) {
190             continue;    /* ignore lower triangular blocks */
191           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
192         }
193         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
194           col  = in[j] - cstart_orig;         /* local col index */
195           brow = row/bs; bcol = col/bs;
196           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
197           if (roworiented) value = v[i*n+j];
198           else             value = v[i+j*m];
199           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
200           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
201         } else if (in[j] < 0) continue;
202 #if defined(PETSC_USE_DEBUG)
203         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
204 #endif
205         else {  /* off-diag entry (B) */
206           if (mat->was_assembled) {
207             if (!baij->colmap) {
208               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
209             }
210 #if defined(PETSC_USE_CTABLE)
211             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
212             col  = col - 1;
213 #else
214             col = baij->colmap[in[j]/bs] - 1;
215 #endif
216             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
217               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
218               col  =  in[j];
219               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
220               B    = baij->B;
221               b    = (Mat_SeqBAIJ*)(B)->data;
222               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
223               ba   = b->a;
224             } else col += in[j]%bs;
225           } else col = in[j];
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         mat->assembled = PETSC_FALSE;
236         n_loc          = 0;
237         for (j=0; j<n; j++) {
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray     =baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if (!barray) {
269     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if ((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++ = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend) {
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         } else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined(PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined(PETSC_USE_CTABLE)
333             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col  = in[j];
341             }
342           } else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
348       if (!baij->donotstash) {
349         if (roworiented) {
350           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         } else {
352           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         }
354       }
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatGetValues_MPISBAIJ"
362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
363 {
364   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
365   PetscErrorCode ierr;
366   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
367   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
368 
369   PetscFunctionBegin;
370   for (i=0; i<m; i++) {
371     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
372     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
373     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
374       row = idxm[i] - bsrstart;
375       for (j=0; j<n; j++) {
376         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
377         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
378         if (idxn[j] >= bscstart && idxn[j] < bscend) {
379           col  = idxn[j] - bscstart;
380           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
381         } else {
382           if (!baij->colmap) {
383             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
384           }
385 #if defined(PETSC_USE_CTABLE)
386           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
387           data--;
388 #else
389           data = baij->colmap[idxn[j]/bs]-1;
390 #endif
391           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
392           else {
393             col  = data + idxn[j]%bs;
394             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
395           }
396         }
397       }
398     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
399   }
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNCT__
404 #define __FUNCT__ "MatNorm_MPISBAIJ"
405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
406 {
407   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscReal      sum[2],*lnorm2;
410 
411   PetscFunctionBegin;
412   if (baij->size == 1) {
413     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
414   } else {
415     if (type == NORM_FROBENIUS) {
416       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
417       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
418       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
419       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
420       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
421       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
422       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
423       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
424     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
425       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
426       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
427       PetscReal    *rsum,*rsum2,vabs;
428       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
429       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
430       MatScalar    *v;
431 
432       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
433       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz   = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++) {
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++) {
442             for (row=0; row<bs; row++) {
443               vabs            = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++) {
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++) {
459             for (row=0; row<bs; row++) {
460               vabs            = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap->N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
473     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
474   }
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
483   PetscErrorCode ierr;
484   PetscInt       nstash,reallocs;
485   InsertMode     addv;
486 
487   PetscFunctionBegin;
488   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
489 
490   /* make sure all processors are either in INSERTMODE or ADDMODE */
491   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
492   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
493   mat->insertmode = addv; /* in case this processor had no cache */
494 
495   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
496   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
497   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
498   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
499   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
500   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
507 {
508   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
509   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
510   PetscErrorCode ierr;
511   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
512   PetscInt       *row,*col;
513   PetscBool      other_disassembled;
514   PetscMPIInt    n;
515   PetscBool      r1,r2,r3;
516   MatScalar      *val;
517   InsertMode     addv = mat->insertmode;
518 
519   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
520   PetscFunctionBegin;
521   if (!baij->donotstash &&  !mat->nooffprocentries) {
522     while (1) {
523       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
524       if (!flg) break;
525 
526       for (i=0; i<n;) {
527         /* Now identify the consecutive vals belonging to the same row */
528         for (j=i,rstart=row[j]; j<n; j++) {
529           if (row[j] != rstart) break;
530         }
531         if (j < n) ncols = j-i;
532         else       ncols = n-i;
533         /* Now assemble all these values with a single function call */
534         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
535         i    = j;
536       }
537     }
538     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
539     /* Now process the block-stash. Since the values are stashed column-oriented,
540        set the roworiented flag to column oriented, and after MatSetValues()
541        restore the original flags */
542     r1 = baij->roworiented;
543     r2 = a->roworiented;
544     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
545 
546     baij->roworiented = PETSC_FALSE;
547     a->roworiented    = PETSC_FALSE;
548 
549     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
550     while (1) {
551       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
552       if (!flg) break;
553 
554       for (i=0; i<n;) {
555         /* Now identify the consecutive vals belonging to the same row */
556         for (j=i,rstart=row[j]; j<n; j++) {
557           if (row[j] != rstart) break;
558         }
559         if (j < n) ncols = j-i;
560         else       ncols = n-i;
561         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
562         i    = j;
563       }
564     }
565     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
566 
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569 
570     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
571   }
572 
573   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
591   }
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
596 
597   baij->rowvalues = 0;
598 
599   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
600   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
601     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
602     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
609 #include <petscdraw.h>
610 #undef __FUNCT__
611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
613 {
614   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
615   PetscErrorCode    ierr;
616   PetscInt          bs   = mat->rmap->bs;
617   PetscMPIInt       rank = baij->rank;
618   PetscBool         iascii,isdraw;
619   PetscViewer       sviewer;
620   PetscViewerFormat format;
621 
622   PetscFunctionBegin;
623   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
624   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
625   if (iascii) {
626     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
627     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
628       MatInfo info;
629       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
630       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
633       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
634       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
635       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
636       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
637       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
638       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
639       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
640       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
641       PetscFunctionReturn(0);
642     } else if (format == PETSC_VIEWER_ASCII_INFO) {
643       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
644       PetscFunctionReturn(0);
645     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
646       PetscFunctionReturn(0);
647     }
648   }
649 
650   if (isdraw) {
651     PetscDraw draw;
652     PetscBool isnull;
653     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
654     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
655   }
656 
657   {
658     /* assemble the entire matrix onto first processor. */
659     Mat          A;
660     Mat_SeqSBAIJ *Aloc;
661     Mat_SeqBAIJ  *Bloc;
662     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
663     MatScalar    *a;
664     const char   *matname;
665 
666     /* Should this be the same type as mat? */
667     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
668     if (!rank) {
669       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
670     } else {
671       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
672     }
673     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
674     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
675     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
676     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
677 
678     /* copy over the A part */
679     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
680     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
681     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
682 
683     for (i=0; i<mbs; i++) {
684       rvals[0] = bs*(baij->rstartbs + i);
685       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
686       for (j=ai[i]; j<ai[i+1]; j++) {
687         col = (baij->cstartbs+aj[j])*bs;
688         for (k=0; k<bs; k++) {
689           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
690           col++;
691           a += bs;
692         }
693       }
694     }
695     /* copy over the B part */
696     Bloc = (Mat_SeqBAIJ*)baij->B->data;
697     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
698     for (i=0; i<mbs; i++) {
699 
700       rvals[0] = bs*(baij->rstartbs + i);
701       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
702       for (j=ai[i]; j<ai[i+1]; j++) {
703         col = baij->garray[aj[j]]*bs;
704         for (k=0; k<bs; k++) {
705           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
706           col++;
707           a += bs;
708         }
709       }
710     }
711     ierr = PetscFree(rvals);CHKERRQ(ierr);
712     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
713     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
714     /*
715        Everyone has to call to draw the matrix since the graphics waits are
716        synchronized across all processors that share the PetscDraw object
717     */
718     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
719     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
722       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
723     }
724     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
725     ierr = MatDestroy(&A);CHKERRQ(ierr);
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatView_MPISBAIJ"
732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
733 {
734   PetscErrorCode ierr;
735   PetscBool      iascii,isdraw,issocket,isbinary;
736 
737   PetscFunctionBegin;
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
739   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
740   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
741   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
742   if (iascii || isdraw || issocket || isbinary) {
743     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatDestroy_MPISBAIJ"
750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
751 {
752   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
753   PetscErrorCode ierr;
754 
755   PetscFunctionBegin;
756 #if defined(PETSC_USE_LOG)
757   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
758 #endif
759   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
760   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
761   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
762   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
763 #if defined(PETSC_USE_CTABLE)
764   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
765 #else
766   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
767 #endif
768   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
770   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
774   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
776   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
777   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
778   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
779   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
780   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
781   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
782   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
783 #if defined(PETSC_USE_REAL_MAT_SINGLE)
784   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
785 #endif
786   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
788   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
789   ierr = PetscFree(mat->data);CHKERRQ(ierr);
790 
791   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
795   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
796   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
797 #if defined(PETSC_HAVE_ELEMENTAL)
798   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
799 #endif
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
805 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
806 {
807   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
808   PetscErrorCode ierr;
809   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
810   PetscScalar    *x,*from;
811 
812   PetscFunctionBegin;
813   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
814   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
815 
816   /* diagonal part */
817   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
818   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
819 
820   /* subdiagonal part */
821   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
822 
823   /* copy x into the vec slvec0 */
824   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
825   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
826 
827   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
828   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
829   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
830 
831   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
832   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
833   /* supperdiagonal part */
834   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatMult_MPISBAIJ"
840 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
841 {
842   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
843   PetscErrorCode    ierr;
844   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
845   PetscScalar       *from;
846   const PetscScalar *x;
847 
848   PetscFunctionBegin;
849   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
850   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
851 
852   /* diagonal part */
853   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
854   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
855 
856   /* subdiagonal part */
857   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
858 
859   /* copy x into the vec slvec0 */
860   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
861   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
862 
863   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
864   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
865   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
866 
867   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   /* supperdiagonal part */
870   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
876 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
877 {
878   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
879   PetscErrorCode ierr;
880   PetscInt       nt;
881 
882   PetscFunctionBegin;
883   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
884   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
885 
886   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
887   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
888 
889   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890   /* do diagonal part */
891   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
892   /* do supperdiagonal part */
893   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
895   /* do subdiagonal part */
896   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
897   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
898   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
904 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
905 {
906   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
907   PetscErrorCode    ierr;
908   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
909   PetscScalar       *from,zero=0.0;
910   const PetscScalar *x;
911 
912   PetscFunctionBegin;
913   /*
914   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
915   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
916   */
917   /* diagonal part */
918   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
919   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
920 
921   /* subdiagonal part */
922   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
923 
924   /* copy x into the vec slvec0 */
925   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
926   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
927   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
928   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
929 
930   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
932   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
933 
934   /* supperdiagonal part */
935   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
941 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
942 {
943   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
944   PetscErrorCode ierr;
945 
946   PetscFunctionBegin;
947   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   /* do diagonal part */
949   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
950   /* do supperdiagonal part */
951   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
953 
954   /* do subdiagonal part */
955   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
956   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
957   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 /*
962   This only works correctly for square matrices where the subblock A->A is the
963    diagonal block
964 */
965 #undef __FUNCT__
966 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
967 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
968 {
969   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
974   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatScale_MPISBAIJ"
980 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
981 {
982   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
987   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatGetRow_MPISBAIJ"
993 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
994 {
995   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
996   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
997   PetscErrorCode ierr;
998   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
999   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1000   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1001 
1002   PetscFunctionBegin;
1003   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1004   mat->getrowactive = PETSC_TRUE;
1005 
1006   if (!mat->rowvalues && (idx || v)) {
1007     /*
1008         allocate enough space to hold information from the longest row.
1009     */
1010     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1011     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1012     PetscInt     max = 1,mbs = mat->mbs,tmp;
1013     for (i=0; i<mbs; i++) {
1014       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1015       if (max < tmp) max = tmp;
1016     }
1017     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1018   }
1019 
1020   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1021   lrow = row - brstart;  /* local row index */
1022 
1023   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1024   if (!v)   {pvA = 0; pvB = 0;}
1025   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1026   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1027   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1028   nztot = nzA + nzB;
1029 
1030   cmap = mat->garray;
1031   if (v  || idx) {
1032     if (nztot) {
1033       /* Sort by increasing column numbers, assuming A and B already sorted */
1034       PetscInt imark = -1;
1035       if (v) {
1036         *v = v_p = mat->rowvalues;
1037         for (i=0; i<nzB; i++) {
1038           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1039           else break;
1040         }
1041         imark = i;
1042         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1043         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1044       }
1045       if (idx) {
1046         *idx = idx_p = mat->rowindices;
1047         if (imark > -1) {
1048           for (i=0; i<imark; i++) {
1049             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1050           }
1051         } else {
1052           for (i=0; i<nzB; i++) {
1053             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1054             else break;
1055           }
1056           imark = i;
1057         }
1058         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1059         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1060       }
1061     } else {
1062       if (idx) *idx = 0;
1063       if (v)   *v   = 0;
1064     }
1065   }
1066   *nz  = nztot;
1067   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1068   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1075 {
1076   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1077 
1078   PetscFunctionBegin;
1079   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1080   baij->getrowactive = PETSC_FALSE;
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1086 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1087 {
1088   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1089   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1090 
1091   PetscFunctionBegin;
1092   aA->getrow_utriangular = PETSC_TRUE;
1093   PetscFunctionReturn(0);
1094 }
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1097 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1098 {
1099   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1100   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1101 
1102   PetscFunctionBegin;
1103   aA->getrow_utriangular = PETSC_FALSE;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1109 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1110 {
1111   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1116   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1122 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1123 {
1124   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1129   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1135 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1136 {
1137   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1142   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1148 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1149 {
1150   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1151   Mat            A  = a->A,B = a->B;
1152   PetscErrorCode ierr;
1153   PetscReal      isend[5],irecv[5];
1154 
1155   PetscFunctionBegin;
1156   info->block_size = (PetscReal)matin->rmap->bs;
1157 
1158   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1159 
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162 
1163   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1164 
1165   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1166   isend[3] += info->memory;  isend[4] += info->mallocs;
1167   if (flag == MAT_LOCAL) {
1168     info->nz_used      = isend[0];
1169     info->nz_allocated = isend[1];
1170     info->nz_unneeded  = isend[2];
1171     info->memory       = isend[3];
1172     info->mallocs      = isend[4];
1173   } else if (flag == MAT_GLOBAL_MAX) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1175 
1176     info->nz_used      = irecv[0];
1177     info->nz_allocated = irecv[1];
1178     info->nz_unneeded  = irecv[2];
1179     info->memory       = irecv[3];
1180     info->mallocs      = irecv[4];
1181   } else if (flag == MAT_GLOBAL_SUM) {
1182     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1183 
1184     info->nz_used      = irecv[0];
1185     info->nz_allocated = irecv[1];
1186     info->nz_unneeded  = irecv[2];
1187     info->memory       = irecv[3];
1188     info->mallocs      = irecv[4];
1189   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1190   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1191   info->fill_ratio_needed = 0;
1192   info->factor_mallocs    = 0;
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1198 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1199 {
1200   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1201   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NEW_NONZERO_LOCATIONS:
1207   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1208   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1209   case MAT_KEEP_NONZERO_PATTERN:
1210   case MAT_NEW_NONZERO_LOCATION_ERR:
1211     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1212     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1213     break;
1214   case MAT_ROW_ORIENTED:
1215     a->roworiented = flg;
1216 
1217     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1218     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1219     break;
1220   case MAT_NEW_DIAGONALS:
1221     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1222     break;
1223   case MAT_IGNORE_OFF_PROC_ENTRIES:
1224     a->donotstash = flg;
1225     break;
1226   case MAT_USE_HASH_TABLE:
1227     a->ht_flag = flg;
1228     break;
1229   case MAT_HERMITIAN:
1230     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1231     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1232 
1233     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1234     break;
1235   case MAT_SPD:
1236     A->spd_set = PETSC_TRUE;
1237     A->spd     = flg;
1238     if (flg) {
1239       A->symmetric                  = PETSC_TRUE;
1240       A->structurally_symmetric     = PETSC_TRUE;
1241       A->symmetric_set              = PETSC_TRUE;
1242       A->structurally_symmetric_set = PETSC_TRUE;
1243     }
1244     break;
1245   case MAT_SYMMETRIC:
1246     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1247     break;
1248   case MAT_STRUCTURALLY_SYMMETRIC:
1249     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1250     break;
1251   case MAT_SYMMETRY_ETERNAL:
1252     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1253     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1254     break;
1255   case MAT_IGNORE_LOWER_TRIANGULAR:
1256     aA->ignore_ltriangular = flg;
1257     break;
1258   case MAT_ERROR_LOWER_TRIANGULAR:
1259     aA->ignore_ltriangular = flg;
1260     break;
1261   case MAT_GETROW_UPPERTRIANGULAR:
1262     aA->getrow_utriangular = flg;
1263     break;
1264   default:
1265     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1272 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1273 {
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277   if (MAT_INITIAL_MATRIX || *B != A) {
1278     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1279   }
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1285 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1286 {
1287   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1288   Mat            a     = baij->A, b=baij->B;
1289   PetscErrorCode ierr;
1290   PetscInt       nv,m,n;
1291   PetscBool      flg;
1292 
1293   PetscFunctionBegin;
1294   if (ll != rr) {
1295     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1296     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1297   }
1298   if (!ll) PetscFunctionReturn(0);
1299 
1300   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1301   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1302 
1303   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1304   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1305 
1306   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1307 
1308   /* left diagonalscale the off-diagonal part */
1309   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1310 
1311   /* scale the diagonal part */
1312   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1313 
1314   /* right diagonalscale the off-diagonal part */
1315   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1316   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1322 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1323 {
1324   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1333 
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatEqual_MPISBAIJ"
1336 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1337 {
1338   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1339   Mat            a,b,c,d;
1340   PetscBool      flg;
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   a = matA->A; b = matA->B;
1345   c = matB->A; d = matB->B;
1346 
1347   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1348   if (flg) {
1349     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1350   }
1351   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatCopy_MPISBAIJ"
1357 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1358 {
1359   PetscErrorCode ierr;
1360   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1361   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1362 
1363   PetscFunctionBegin;
1364   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1365   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1366     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1367     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1368     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1369   } else {
1370     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1371     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1372   }
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1378 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1379 {
1380   PetscErrorCode ierr;
1381 
1382   PetscFunctionBegin;
1383   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1389 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1390 {
1391   PetscErrorCode ierr;
1392   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1393   PetscBLASInt   bnz,one=1;
1394   Mat_SeqSBAIJ   *xa,*ya;
1395   Mat_SeqBAIJ    *xb,*yb;
1396 
1397   PetscFunctionBegin;
1398   if (str == SAME_NONZERO_PATTERN) {
1399     PetscScalar alpha = a;
1400     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1401     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1402     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1403     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1404     xb   = (Mat_SeqBAIJ*)xx->B->data;
1405     yb   = (Mat_SeqBAIJ*)yy->B->data;
1406     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1407     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1408     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1409   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1410     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1411     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1412     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1413   } else {
1414     Mat      B;
1415     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1416     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1417     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1418     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1419     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1420     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1421     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1422     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1423     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1424     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1425     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1426     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1427     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1428     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1429     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1430     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1431     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1432     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1433     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1434     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1441 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1442 {
1443   PetscErrorCode ierr;
1444   PetscInt       i;
1445   PetscBool      flg;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1449   for (i=0; i<n; i++) {
1450     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1451     if (!flg) { /* *B[i] is non-symmetric, set flag */
1452       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1453     }
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 /* -------------------------------------------------------------------*/
1459 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1460                                        MatGetRow_MPISBAIJ,
1461                                        MatRestoreRow_MPISBAIJ,
1462                                        MatMult_MPISBAIJ,
1463                                /*  4*/ MatMultAdd_MPISBAIJ,
1464                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1465                                        MatMultAdd_MPISBAIJ,
1466                                        0,
1467                                        0,
1468                                        0,
1469                                /* 10*/ 0,
1470                                        0,
1471                                        0,
1472                                        MatSOR_MPISBAIJ,
1473                                        MatTranspose_MPISBAIJ,
1474                                /* 15*/ MatGetInfo_MPISBAIJ,
1475                                        MatEqual_MPISBAIJ,
1476                                        MatGetDiagonal_MPISBAIJ,
1477                                        MatDiagonalScale_MPISBAIJ,
1478                                        MatNorm_MPISBAIJ,
1479                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1480                                        MatAssemblyEnd_MPISBAIJ,
1481                                        MatSetOption_MPISBAIJ,
1482                                        MatZeroEntries_MPISBAIJ,
1483                                /* 24*/ 0,
1484                                        0,
1485                                        0,
1486                                        0,
1487                                        0,
1488                                /* 29*/ MatSetUp_MPISBAIJ,
1489                                        0,
1490                                        0,
1491                                        0,
1492                                        0,
1493                                /* 34*/ MatDuplicate_MPISBAIJ,
1494                                        0,
1495                                        0,
1496                                        0,
1497                                        0,
1498                                /* 39*/ MatAXPY_MPISBAIJ,
1499                                        MatGetSubMatrices_MPISBAIJ,
1500                                        MatIncreaseOverlap_MPISBAIJ,
1501                                        MatGetValues_MPISBAIJ,
1502                                        MatCopy_MPISBAIJ,
1503                                /* 44*/ 0,
1504                                        MatScale_MPISBAIJ,
1505                                        0,
1506                                        0,
1507                                        0,
1508                                /* 49*/ 0,
1509                                        0,
1510                                        0,
1511                                        0,
1512                                        0,
1513                                /* 54*/ 0,
1514                                        0,
1515                                        MatSetUnfactored_MPISBAIJ,
1516                                        0,
1517                                        MatSetValuesBlocked_MPISBAIJ,
1518                                /* 59*/ 0,
1519                                        0,
1520                                        0,
1521                                        0,
1522                                        0,
1523                                /* 64*/ 0,
1524                                        0,
1525                                        0,
1526                                        0,
1527                                        0,
1528                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1529                                        0,
1530                                        0,
1531                                        0,
1532                                        0,
1533                                /* 74*/ 0,
1534                                        0,
1535                                        0,
1536                                        0,
1537                                        0,
1538                                /* 79*/ 0,
1539                                        0,
1540                                        0,
1541                                        0,
1542                                        MatLoad_MPISBAIJ,
1543                                /* 84*/ 0,
1544                                        0,
1545                                        0,
1546                                        0,
1547                                        0,
1548                                /* 89*/ 0,
1549                                        0,
1550                                        0,
1551                                        0,
1552                                        0,
1553                                /* 94*/ 0,
1554                                        0,
1555                                        0,
1556                                        0,
1557                                        0,
1558                                /* 99*/ 0,
1559                                        0,
1560                                        0,
1561                                        0,
1562                                        0,
1563                                /*104*/ 0,
1564                                        MatRealPart_MPISBAIJ,
1565                                        MatImaginaryPart_MPISBAIJ,
1566                                        MatGetRowUpperTriangular_MPISBAIJ,
1567                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1568                                /*109*/ 0,
1569                                        0,
1570                                        0,
1571                                        0,
1572                                        0,
1573                                /*114*/ 0,
1574                                        0,
1575                                        0,
1576                                        0,
1577                                        0,
1578                                /*119*/ 0,
1579                                        0,
1580                                        0,
1581                                        0,
1582                                        0,
1583                                /*124*/ 0,
1584                                        0,
1585                                        0,
1586                                        0,
1587                                        0,
1588                                /*129*/ 0,
1589                                        0,
1590                                        0,
1591                                        0,
1592                                        0,
1593                                /*134*/ 0,
1594                                        0,
1595                                        0,
1596                                        0,
1597                                        0,
1598                                /*139*/ 0,
1599                                        0,
1600                                        0,
1601                                        0,
1602                                        0,
1603                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1604 };
1605 
1606 #undef __FUNCT__
1607 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1608 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1609 {
1610   PetscFunctionBegin;
1611   *a = ((Mat_MPISBAIJ*)A->data)->A;
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1617 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1618 {
1619   Mat_MPISBAIJ   *b;
1620   PetscErrorCode ierr;
1621   PetscInt       i,mbs,Mbs;
1622 
1623   PetscFunctionBegin;
1624   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1625   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1626   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1627   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1628 
1629   b   = (Mat_MPISBAIJ*)B->data;
1630   mbs = B->rmap->n/bs;
1631   Mbs = B->rmap->N/bs;
1632   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1633 
1634   B->rmap->bs = bs;
1635   b->bs2      = bs*bs;
1636   b->mbs      = mbs;
1637   b->Mbs      = Mbs;
1638   b->nbs      = B->cmap->n/bs;
1639   b->Nbs      = B->cmap->N/bs;
1640 
1641   for (i=0; i<=b->size; i++) {
1642     b->rangebs[i] = B->rmap->range[i]/bs;
1643   }
1644   b->rstartbs = B->rmap->rstart/bs;
1645   b->rendbs   = B->rmap->rend/bs;
1646 
1647   b->cstartbs = B->cmap->rstart/bs;
1648   b->cendbs   = B->cmap->rend/bs;
1649 
1650   if (!B->preallocated) {
1651     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1652     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1653     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1654     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1655     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1656     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1657     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1658     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1659     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1660   }
1661 
1662   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1663   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1664 
1665   B->preallocated = PETSC_TRUE;
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1671 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1672 {
1673   PetscInt       m,rstart,cstart,cend;
1674   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1675   const PetscInt *JJ    =0;
1676   PetscScalar    *values=0;
1677   PetscErrorCode ierr;
1678 
1679   PetscFunctionBegin;
1680   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1681   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1682   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1683   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1684   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1685   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1686   m      = B->rmap->n/bs;
1687   rstart = B->rmap->rstart/bs;
1688   cstart = B->cmap->rstart/bs;
1689   cend   = B->cmap->rend/bs;
1690 
1691   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1692   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1693   for (i=0; i<m; i++) {
1694     nz = ii[i+1] - ii[i];
1695     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1696     nz_max = PetscMax(nz_max,nz);
1697     JJ     = jj + ii[i];
1698     for (j=0; j<nz; j++) {
1699       if (*JJ >= cstart) break;
1700       JJ++;
1701     }
1702     d = 0;
1703     for (; j<nz; j++) {
1704       if (*JJ++ >= cend) break;
1705       d++;
1706     }
1707     d_nnz[i] = d;
1708     o_nnz[i] = nz - d;
1709   }
1710   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1711   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1712 
1713   values = (PetscScalar*)V;
1714   if (!values) {
1715     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1716     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1717   }
1718   for (i=0; i<m; i++) {
1719     PetscInt          row    = i + rstart;
1720     PetscInt          ncols  = ii[i+1] - ii[i];
1721     const PetscInt    *icols = jj + ii[i];
1722     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1723     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1724   }
1725 
1726   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1727   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1728   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1729   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 /*MC
1734    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1735    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1736    the matrix is stored.
1737 
1738   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1739   can call MatSetOption(Mat, MAT_HERMITIAN);
1740 
1741    Options Database Keys:
1742 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1743 
1744   Level: beginner
1745 
1746 .seealso: MatCreateMPISBAIJ
1747 M*/
1748 
1749 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1750 
1751 #undef __FUNCT__
1752 #define __FUNCT__ "MatCreate_MPISBAIJ"
1753 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1754 {
1755   Mat_MPISBAIJ   *b;
1756   PetscErrorCode ierr;
1757   PetscBool      flg = PETSC_FALSE;
1758 
1759   PetscFunctionBegin;
1760   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1761   B->data = (void*)b;
1762   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1763 
1764   B->ops->destroy = MatDestroy_MPISBAIJ;
1765   B->ops->view    = MatView_MPISBAIJ;
1766   B->assembled    = PETSC_FALSE;
1767   B->insertmode   = NOT_SET_VALUES;
1768 
1769   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1770   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1771 
1772   /* build local table of row and column ownerships */
1773   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
1774 
1775   /* build cache for off array entries formed */
1776   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1777 
1778   b->donotstash  = PETSC_FALSE;
1779   b->colmap      = NULL;
1780   b->garray      = NULL;
1781   b->roworiented = PETSC_TRUE;
1782 
1783   /* stuff used in block assembly */
1784   b->barray = 0;
1785 
1786   /* stuff used for matrix vector multiply */
1787   b->lvec    = 0;
1788   b->Mvctx   = 0;
1789   b->slvec0  = 0;
1790   b->slvec0b = 0;
1791   b->slvec1  = 0;
1792   b->slvec1a = 0;
1793   b->slvec1b = 0;
1794   b->sMvctx  = 0;
1795 
1796   /* stuff for MatGetRow() */
1797   b->rowindices   = 0;
1798   b->rowvalues    = 0;
1799   b->getrowactive = PETSC_FALSE;
1800 
1801   /* hash table stuff */
1802   b->ht           = 0;
1803   b->hd           = 0;
1804   b->ht_size      = 0;
1805   b->ht_flag      = PETSC_FALSE;
1806   b->ht_fact      = 0;
1807   b->ht_total_ct  = 0;
1808   b->ht_insert_ct = 0;
1809 
1810   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1811   b->ijonly = PETSC_FALSE;
1812 
1813   b->in_loc = 0;
1814   b->v_loc  = 0;
1815   b->n_loc  = 0;
1816 
1817   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1818   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1819   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1820   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1821   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1822   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1823 #if defined(PETSC_HAVE_ELEMENTAL)
1824   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
1825 #endif
1826 
1827   B->symmetric                  = PETSC_TRUE;
1828   B->structurally_symmetric     = PETSC_TRUE;
1829   B->symmetric_set              = PETSC_TRUE;
1830   B->structurally_symmetric_set = PETSC_TRUE;
1831 
1832   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1833   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1834   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
1835   if (flg) {
1836     PetscReal fact = 1.39;
1837     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1838     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1839     if (fact <= 1.0) fact = 1.39;
1840     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1841     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1842   }
1843   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 /*MC
1848    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1849 
1850    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1851    and MATMPISBAIJ otherwise.
1852 
1853    Options Database Keys:
1854 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1855 
1856   Level: beginner
1857 
1858 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1859 M*/
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1863 /*@C
1864    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1865    the user should preallocate the matrix storage by setting the parameters
1866    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1867    performance can be increased by more than a factor of 50.
1868 
1869    Collective on Mat
1870 
1871    Input Parameters:
1872 +  B - the matrix
1873 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1874           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1875 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1876            submatrix  (same for all local rows)
1877 .  d_nnz - array containing the number of block nonzeros in the various block rows
1878            in the upper triangular and diagonal part of the in diagonal portion of the local
1879            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1880            for the diagonal entry and set a value even if it is zero.
1881 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1882            submatrix (same for all local rows).
1883 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1884            off-diagonal portion of the local submatrix that is right of the diagonal
1885            (possibly different for each block row) or NULL.
1886 
1887 
1888    Options Database Keys:
1889 .   -mat_no_unroll - uses code that does not unroll the loops in the
1890                      block calculations (much slower)
1891 .   -mat_block_size - size of the blocks to use
1892 
1893    Notes:
1894 
1895    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1896    than it must be used on all processors that share the object for that argument.
1897 
1898    If the *_nnz parameter is given then the *_nz parameter is ignored
1899 
1900    Storage Information:
1901    For a square global matrix we define each processor's diagonal portion
1902    to be its local rows and the corresponding columns (a square submatrix);
1903    each processor's off-diagonal portion encompasses the remainder of the
1904    local matrix (a rectangular submatrix).
1905 
1906    The user can specify preallocated storage for the diagonal part of
1907    the local submatrix with either d_nz or d_nnz (not both).  Set
1908    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1909    memory allocation.  Likewise, specify preallocated storage for the
1910    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1911 
1912    You can call MatGetInfo() to get information on how effective the preallocation was;
1913    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1914    You can also run with the option -info and look for messages with the string
1915    malloc in them to see if additional memory allocation was needed.
1916 
1917    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1918    the figure below we depict these three local rows and all columns (0-11).
1919 
1920 .vb
1921            0 1 2 3 4 5 6 7 8 9 10 11
1922           --------------------------
1923    row 3  |. . . d d d o o o o  o  o
1924    row 4  |. . . d d d o o o o  o  o
1925    row 5  |. . . d d d o o o o  o  o
1926           --------------------------
1927 .ve
1928 
1929    Thus, any entries in the d locations are stored in the d (diagonal)
1930    submatrix, and any entries in the o locations are stored in the
1931    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1932    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1933 
1934    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1935    plus the diagonal part of the d matrix,
1936    and o_nz should indicate the number of block nonzeros per row in the o matrix
1937 
1938    In general, for PDE problems in which most nonzeros are near the diagonal,
1939    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1940    or you will get TERRIBLE performance; see the users' manual chapter on
1941    matrices.
1942 
1943    Level: intermediate
1944 
1945 .keywords: matrix, block, aij, compressed row, sparse, parallel
1946 
1947 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1948 @*/
1949 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1950 {
1951   PetscErrorCode ierr;
1952 
1953   PetscFunctionBegin;
1954   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1955   PetscValidType(B,1);
1956   PetscValidLogicalCollectiveInt(B,bs,2);
1957   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatCreateSBAIJ"
1963 /*@C
1964    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1965    (block compressed row).  For good matrix assembly performance
1966    the user should preallocate the matrix storage by setting the parameters
1967    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1968    performance can be increased by more than a factor of 50.
1969 
1970    Collective on MPI_Comm
1971 
1972    Input Parameters:
1973 +  comm - MPI communicator
1974 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1975           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1976 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1977            This value should be the same as the local size used in creating the
1978            y vector for the matrix-vector product y = Ax.
1979 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1980            This value should be the same as the local size used in creating the
1981            x vector for the matrix-vector product y = Ax.
1982 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1983 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1984 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1985            submatrix  (same for all local rows)
1986 .  d_nnz - array containing the number of block nonzeros in the various block rows
1987            in the upper triangular portion of the in diagonal portion of the local
1988            (possibly different for each block block row) or NULL.
1989            If you plan to factor the matrix you must leave room for the diagonal entry and
1990            set its value even if it is zero.
1991 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1992            submatrix (same for all local rows).
1993 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1994            off-diagonal portion of the local submatrix (possibly different for
1995            each block row) or NULL.
1996 
1997    Output Parameter:
1998 .  A - the matrix
1999 
2000    Options Database Keys:
2001 .   -mat_no_unroll - uses code that does not unroll the loops in the
2002                      block calculations (much slower)
2003 .   -mat_block_size - size of the blocks to use
2004 .   -mat_mpi - use the parallel matrix data structures even on one processor
2005                (defaults to using SeqBAIJ format on one processor)
2006 
2007    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2008    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2009    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2010 
2011    Notes:
2012    The number of rows and columns must be divisible by blocksize.
2013    This matrix type does not support complex Hermitian operation.
2014 
2015    The user MUST specify either the local or global matrix dimensions
2016    (possibly both).
2017 
2018    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2019    than it must be used on all processors that share the object for that argument.
2020 
2021    If the *_nnz parameter is given then the *_nz parameter is ignored
2022 
2023    Storage Information:
2024    For a square global matrix we define each processor's diagonal portion
2025    to be its local rows and the corresponding columns (a square submatrix);
2026    each processor's off-diagonal portion encompasses the remainder of the
2027    local matrix (a rectangular submatrix).
2028 
2029    The user can specify preallocated storage for the diagonal part of
2030    the local submatrix with either d_nz or d_nnz (not both).  Set
2031    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2032    memory allocation.  Likewise, specify preallocated storage for the
2033    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2034 
2035    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2036    the figure below we depict these three local rows and all columns (0-11).
2037 
2038 .vb
2039            0 1 2 3 4 5 6 7 8 9 10 11
2040           --------------------------
2041    row 3  |. . . d d d o o o o  o  o
2042    row 4  |. . . d d d o o o o  o  o
2043    row 5  |. . . d d d o o o o  o  o
2044           --------------------------
2045 .ve
2046 
2047    Thus, any entries in the d locations are stored in the d (diagonal)
2048    submatrix, and any entries in the o locations are stored in the
2049    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2050    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2051 
2052    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2053    plus the diagonal part of the d matrix,
2054    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2055    In general, for PDE problems in which most nonzeros are near the diagonal,
2056    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2057    or you will get TERRIBLE performance; see the users' manual chapter on
2058    matrices.
2059 
2060    Level: intermediate
2061 
2062 .keywords: matrix, block, aij, compressed row, sparse, parallel
2063 
2064 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2065 @*/
2066 
2067 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2068 {
2069   PetscErrorCode ierr;
2070   PetscMPIInt    size;
2071 
2072   PetscFunctionBegin;
2073   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2074   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2075   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2076   if (size > 1) {
2077     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2078     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2079   } else {
2080     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2081     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2082   }
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 
2087 #undef __FUNCT__
2088 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2089 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2090 {
2091   Mat            mat;
2092   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2093   PetscErrorCode ierr;
2094   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2095   PetscScalar    *array;
2096 
2097   PetscFunctionBegin;
2098   *newmat = 0;
2099 
2100   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2101   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2102   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2103   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2104   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2105   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2106 
2107   mat->factortype   = matin->factortype;
2108   mat->preallocated = PETSC_TRUE;
2109   mat->assembled    = PETSC_TRUE;
2110   mat->insertmode   = NOT_SET_VALUES;
2111 
2112   a      = (Mat_MPISBAIJ*)mat->data;
2113   a->bs2 = oldmat->bs2;
2114   a->mbs = oldmat->mbs;
2115   a->nbs = oldmat->nbs;
2116   a->Mbs = oldmat->Mbs;
2117   a->Nbs = oldmat->Nbs;
2118 
2119 
2120   a->size         = oldmat->size;
2121   a->rank         = oldmat->rank;
2122   a->donotstash   = oldmat->donotstash;
2123   a->roworiented  = oldmat->roworiented;
2124   a->rowindices   = 0;
2125   a->rowvalues    = 0;
2126   a->getrowactive = PETSC_FALSE;
2127   a->barray       = 0;
2128   a->rstartbs     = oldmat->rstartbs;
2129   a->rendbs       = oldmat->rendbs;
2130   a->cstartbs     = oldmat->cstartbs;
2131   a->cendbs       = oldmat->cendbs;
2132 
2133   /* hash table stuff */
2134   a->ht           = 0;
2135   a->hd           = 0;
2136   a->ht_size      = 0;
2137   a->ht_flag      = oldmat->ht_flag;
2138   a->ht_fact      = oldmat->ht_fact;
2139   a->ht_total_ct  = 0;
2140   a->ht_insert_ct = 0;
2141 
2142   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2143   if (oldmat->colmap) {
2144 #if defined(PETSC_USE_CTABLE)
2145     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2146 #else
2147     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2148     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2149     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2150 #endif
2151   } else a->colmap = 0;
2152 
2153   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2154     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2155     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2156     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2157   } else a->garray = 0;
2158 
2159   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2160   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2161   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2162   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2163   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2164 
2165   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2166   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2167   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2168   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2169 
2170   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2171   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2172   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2173   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2174   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2175   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2176   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2177   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2178   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2179   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2180   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2181   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2182   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2183 
2184   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2185   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2186   a->sMvctx = oldmat->sMvctx;
2187   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2188 
2189   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2190   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2191   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2192   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2193   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2194   *newmat = mat;
2195   PetscFunctionReturn(0);
2196 }
2197 
2198 #undef __FUNCT__
2199 #define __FUNCT__ "MatLoad_MPISBAIJ"
2200 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2201 {
2202   PetscErrorCode ierr;
2203   PetscInt       i,nz,j,rstart,rend;
2204   PetscScalar    *vals,*buf;
2205   MPI_Comm       comm;
2206   MPI_Status     status;
2207   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2208   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2209   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2210   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2211   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2212   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2213   int            fd;
2214 
2215   PetscFunctionBegin;
2216   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2217   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2218   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2219   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2220   if (bs < 0) bs = 1;
2221 
2222   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2223   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2224   if (!rank) {
2225     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2226     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2227     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2228     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2229   }
2230 
2231   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2232 
2233   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2234   M    = header[1];
2235   N    = header[2];
2236 
2237   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2238   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2239   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2240 
2241   /* If global sizes are set, check if they are consistent with that given in the file */
2242   if (sizesset) {
2243     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2244   }
2245   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2246   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2247 
2248   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2249 
2250   /*
2251      This code adds extra rows to make sure the number of rows is
2252      divisible by the blocksize
2253   */
2254   Mbs        = M/bs;
2255   extra_rows = bs - M + bs*(Mbs);
2256   if (extra_rows == bs) extra_rows = 0;
2257   else                  Mbs++;
2258   if (extra_rows &&!rank) {
2259     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2260   }
2261 
2262   /* determine ownership of all rows */
2263   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2264     mbs = Mbs/size + ((Mbs % size) > rank);
2265     m   = mbs*bs;
2266   } else { /* User Set */
2267     m   = newmat->rmap->n;
2268     mbs = m/bs;
2269   }
2270   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2271   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2272   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2273   rowners[0] = 0;
2274   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2275   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2276   rstart = rowners[rank];
2277   rend   = rowners[rank+1];
2278 
2279   /* distribute row lengths to all processors */
2280   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2281   if (!rank) {
2282     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2283     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2284     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2285     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2286     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2287     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2288     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2289   } else {
2290     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2291   }
2292 
2293   if (!rank) {   /* procs[0] */
2294     /* calculate the number of nonzeros on each processor */
2295     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2296     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2297     for (i=0; i<size; i++) {
2298       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2299         procsnz[i] += rowlengths[j];
2300       }
2301     }
2302     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2303 
2304     /* determine max buffer needed and allocate it */
2305     maxnz = 0;
2306     for (i=0; i<size; i++) {
2307       maxnz = PetscMax(maxnz,procsnz[i]);
2308     }
2309     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2310 
2311     /* read in my part of the matrix column indices  */
2312     nz     = procsnz[0];
2313     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2314     mycols = ibuf;
2315     if (size == 1) nz -= extra_rows;
2316     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2317     if (size == 1) {
2318       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2319     }
2320 
2321     /* read in every ones (except the last) and ship off */
2322     for (i=1; i<size-1; i++) {
2323       nz   = procsnz[i];
2324       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2325       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2326     }
2327     /* read in the stuff for the last proc */
2328     if (size != 1) {
2329       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2330       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2331       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2332       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2333     }
2334     ierr = PetscFree(cols);CHKERRQ(ierr);
2335   } else {  /* procs[i], i>0 */
2336     /* determine buffer space needed for message */
2337     nz = 0;
2338     for (i=0; i<m; i++) nz += locrowlens[i];
2339     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2340     mycols = ibuf;
2341     /* receive message of column indices*/
2342     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2343     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2344     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2345   }
2346 
2347   /* loop over local rows, determining number of off diagonal entries */
2348   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2349   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2350   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2351   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2352   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2353   rowcount = 0;
2354   nzcount  = 0;
2355   for (i=0; i<mbs; i++) {
2356     dcount  = 0;
2357     odcount = 0;
2358     for (j=0; j<bs; j++) {
2359       kmax = locrowlens[rowcount];
2360       for (k=0; k<kmax; k++) {
2361         tmp = mycols[nzcount++]/bs; /* block col. index */
2362         if (!mask[tmp]) {
2363           mask[tmp] = 1;
2364           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2365           else masked1[dcount++] = tmp; /* entry in diag portion */
2366         }
2367       }
2368       rowcount++;
2369     }
2370 
2371     dlens[i]  = dcount;  /* d_nzz[i] */
2372     odlens[i] = odcount; /* o_nzz[i] */
2373 
2374     /* zero out the mask elements we set */
2375     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2376     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2377   }
2378   if (!sizesset) {
2379     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2380   }
2381   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2382   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2383 
2384   if (!rank) {
2385     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2386     /* read in my part of the matrix numerical values  */
2387     nz     = procsnz[0];
2388     vals   = buf;
2389     mycols = ibuf;
2390     if (size == 1) nz -= extra_rows;
2391     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2392     if (size == 1) {
2393       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2394     }
2395 
2396     /* insert into matrix */
2397     jj = rstart*bs;
2398     for (i=0; i<m; i++) {
2399       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2400       mycols += locrowlens[i];
2401       vals   += locrowlens[i];
2402       jj++;
2403     }
2404 
2405     /* read in other processors (except the last one) and ship out */
2406     for (i=1; i<size-1; i++) {
2407       nz   = procsnz[i];
2408       vals = buf;
2409       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2410       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2411     }
2412     /* the last proc */
2413     if (size != 1) {
2414       nz   = procsnz[i] - extra_rows;
2415       vals = buf;
2416       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2417       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2418       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2419     }
2420     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2421 
2422   } else {
2423     /* receive numeric values */
2424     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2425 
2426     /* receive message of values*/
2427     vals   = buf;
2428     mycols = ibuf;
2429     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2430     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2431     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2432 
2433     /* insert into matrix */
2434     jj = rstart*bs;
2435     for (i=0; i<m; i++) {
2436       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2437       mycols += locrowlens[i];
2438       vals   += locrowlens[i];
2439       jj++;
2440     }
2441   }
2442 
2443   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2444   ierr = PetscFree(buf);CHKERRQ(ierr);
2445   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2446   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2447   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2448   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2449   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2450   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2456 /*XXXXX@
2457    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2458 
2459    Input Parameters:
2460 .  mat  - the matrix
2461 .  fact - factor
2462 
2463    Not Collective on Mat, each process can have a different hash factor
2464 
2465    Level: advanced
2466 
2467   Notes:
2468    This can also be set by the command line option: -mat_use_hash_table fact
2469 
2470 .keywords: matrix, hashtable, factor, HT
2471 
2472 .seealso: MatSetOption()
2473 @XXXXX*/
2474 
2475 
2476 #undef __FUNCT__
2477 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2478 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2479 {
2480   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2481   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2482   PetscReal      atmp;
2483   PetscReal      *work,*svalues,*rvalues;
2484   PetscErrorCode ierr;
2485   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2486   PetscMPIInt    rank,size;
2487   PetscInt       *rowners_bs,dest,count,source;
2488   PetscScalar    *va;
2489   MatScalar      *ba;
2490   MPI_Status     stat;
2491 
2492   PetscFunctionBegin;
2493   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2494   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2495   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2496 
2497   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2498   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2499 
2500   bs  = A->rmap->bs;
2501   mbs = a->mbs;
2502   Mbs = a->Mbs;
2503   ba  = b->a;
2504   bi  = b->i;
2505   bj  = b->j;
2506 
2507   /* find ownerships */
2508   rowners_bs = A->rmap->range;
2509 
2510   /* each proc creates an array to be distributed */
2511   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2512   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2513 
2514   /* row_max for B */
2515   if (rank != size-1) {
2516     for (i=0; i<mbs; i++) {
2517       ncols = bi[1] - bi[0]; bi++;
2518       brow  = bs*i;
2519       for (j=0; j<ncols; j++) {
2520         bcol = bs*(*bj);
2521         for (kcol=0; kcol<bs; kcol++) {
2522           col  = bcol + kcol;                /* local col index */
2523           col += rowners_bs[rank+1];      /* global col index */
2524           for (krow=0; krow<bs; krow++) {
2525             atmp = PetscAbsScalar(*ba); ba++;
2526             row  = brow + krow;   /* local row index */
2527             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2528             if (work[col] < atmp) work[col] = atmp;
2529           }
2530         }
2531         bj++;
2532       }
2533     }
2534 
2535     /* send values to its owners */
2536     for (dest=rank+1; dest<size; dest++) {
2537       svalues = work + rowners_bs[dest];
2538       count   = rowners_bs[dest+1]-rowners_bs[dest];
2539       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2540     }
2541   }
2542 
2543   /* receive values */
2544   if (rank) {
2545     rvalues = work;
2546     count   = rowners_bs[rank+1]-rowners_bs[rank];
2547     for (source=0; source<rank; source++) {
2548       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2549       /* process values */
2550       for (i=0; i<count; i++) {
2551         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2552       }
2553     }
2554   }
2555 
2556   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2557   ierr = PetscFree(work);CHKERRQ(ierr);
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatSOR_MPISBAIJ"
2563 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2564 {
2565   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2566   PetscErrorCode    ierr;
2567   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2568   PetscScalar       *x,*ptr,*from;
2569   Vec               bb1;
2570   const PetscScalar *b;
2571 
2572   PetscFunctionBegin;
2573   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2574   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2575 
2576   if (flag == SOR_APPLY_UPPER) {
2577     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2578     PetscFunctionReturn(0);
2579   }
2580 
2581   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2582     if (flag & SOR_ZERO_INITIAL_GUESS) {
2583       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2584       its--;
2585     }
2586 
2587     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2588     while (its--) {
2589 
2590       /* lower triangular part: slvec0b = - B^T*xx */
2591       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2592 
2593       /* copy xx into slvec0a */
2594       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2595       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2596       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2597       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2598 
2599       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2600 
2601       /* copy bb into slvec1a */
2602       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2603       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2604       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2605       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2606 
2607       /* set slvec1b = 0 */
2608       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2609 
2610       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2611       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2612       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2613       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2614 
2615       /* upper triangular part: bb1 = bb1 - B*x */
2616       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2617 
2618       /* local diagonal sweep */
2619       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2620     }
2621     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2622   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2623     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2624   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2625     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2626   } else if (flag & SOR_EISENSTAT) {
2627     Vec               xx1;
2628     PetscBool         hasop;
2629     const PetscScalar *diag;
2630     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2631     PetscInt          i,n;
2632 
2633     if (!mat->xx1) {
2634       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2635       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2636     }
2637     xx1 = mat->xx1;
2638     bb1 = mat->bb1;
2639 
2640     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2641 
2642     if (!mat->diag) {
2643       /* this is wrong for same matrix with new nonzero values */
2644       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2645       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2646     }
2647     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2648 
2649     if (hasop) {
2650       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2651       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2652     } else {
2653       /*
2654           These two lines are replaced by code that may be a bit faster for a good compiler
2655       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2656       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2657       */
2658       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2659       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2660       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2661       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2662       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2663       if (omega == 1.0) {
2664         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2665         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2666       } else {
2667         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2668         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2669       }
2670       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2671       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2672       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2673       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2674     }
2675 
2676     /* multiply off-diagonal portion of matrix */
2677     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2678     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2679     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2680     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2681     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2682     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2683     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2684     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2685     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2686     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2687 
2688     /* local sweep */
2689     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2690     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2691   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 #undef __FUNCT__
2696 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2697 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2698 {
2699   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2700   PetscErrorCode ierr;
2701   Vec            lvec1,bb1;
2702 
2703   PetscFunctionBegin;
2704   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2705   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2706 
2707   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2708     if (flag & SOR_ZERO_INITIAL_GUESS) {
2709       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2710       its--;
2711     }
2712 
2713     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2714     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2715     while (its--) {
2716       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717 
2718       /* lower diagonal part: bb1 = bb - B^T*xx */
2719       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2720       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2721 
2722       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2723       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2724       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2725 
2726       /* upper diagonal part: bb1 = bb1 - B*x */
2727       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2728       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2729 
2730       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2731 
2732       /* diagonal sweep */
2733       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2734     }
2735     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2736     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2737   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2738   PetscFunctionReturn(0);
2739 }
2740 
2741 #undef __FUNCT__
2742 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2743 /*@
2744      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2745          CSR format the local rows.
2746 
2747    Collective on MPI_Comm
2748 
2749    Input Parameters:
2750 +  comm - MPI communicator
2751 .  bs - the block size, only a block size of 1 is supported
2752 .  m - number of local rows (Cannot be PETSC_DECIDE)
2753 .  n - This value should be the same as the local size used in creating the
2754        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2755        calculated if N is given) For square matrices n is almost always m.
2756 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2757 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2758 .   i - row indices
2759 .   j - column indices
2760 -   a - matrix values
2761 
2762    Output Parameter:
2763 .   mat - the matrix
2764 
2765    Level: intermediate
2766 
2767    Notes:
2768        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2769      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2770      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2771 
2772        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2773 
2774 .keywords: matrix, aij, compressed row, sparse, parallel
2775 
2776 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2777           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2778 @*/
2779 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2780 {
2781   PetscErrorCode ierr;
2782 
2783 
2784   PetscFunctionBegin;
2785   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2786   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2787   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2788   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2789   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2790   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 
2795 #undef __FUNCT__
2796 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2797 /*@C
2798    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2799    (the default parallel PETSc format).
2800 
2801    Collective on MPI_Comm
2802 
2803    Input Parameters:
2804 +  B - the matrix
2805 .  bs - the block size
2806 .  i - the indices into j for the start of each local row (starts with zero)
2807 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2808 -  v - optional values in the matrix
2809 
2810    Level: developer
2811 
2812 .keywords: matrix, aij, compressed row, sparse, parallel
2813 
2814 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2815 @*/
2816 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2817 {
2818   PetscErrorCode ierr;
2819 
2820   PetscFunctionBegin;
2821   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 #undef __FUNCT__
2826 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
2827 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2828 {
2829   PetscErrorCode ierr;
2830   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2831   PetscInt       *indx;
2832   PetscScalar    *values;
2833 
2834   PetscFunctionBegin;
2835   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2836   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2837     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2838     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
2839     PetscInt       *bindx,rmax=a->rmax,j;
2840 
2841     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2842     mbs = m/bs; Nbs = N/cbs;
2843     if (n == PETSC_DECIDE) {
2844       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
2845     }
2846     /* Check sum(n) = Nbs */
2847     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2848     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
2849 
2850     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2851     rstart -= mbs;
2852 
2853     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2854     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
2855     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2856     for (i=0; i<mbs; i++) {
2857       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2858       nnz = nnz/bs;
2859       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2860       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2861       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2862     }
2863     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2864     ierr = PetscFree(bindx);CHKERRQ(ierr);
2865 
2866     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2867     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2868     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2869     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2870     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2871     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2872   }
2873 
2874   /* numeric phase */
2875   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2876   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2877 
2878   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2879   for (i=0; i<m; i++) {
2880     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2881     Ii   = i + rstart;
2882     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2883     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2884   }
2885   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2886   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2887   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2888   PetscFunctionReturn(0);
2889 }
2890