xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision ee900cc92187a7889ca4f268c65f36fb91826ff2)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 #if defined(PETSC_HAVE_ELEMENTAL)
23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
24 #endif
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
28 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
29 {
30   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
31   PetscErrorCode ierr;
32 
33   PetscFunctionBegin;
34   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
35   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
53   { \
54  \
55     brow = row/bs;  \
56     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
57     rmax = aimax[brow]; nrow = ailen[brow]; \
58     bcol = col/bs; \
59     ridx = row % bs; cidx = col % bs; \
60     low  = 0; high = nrow; \
61     while (high-low > 3) { \
62       t = (low+high)/2; \
63       if (rp[t] > bcol) high = t; \
64       else              low  = t; \
65     } \
66     for (_i=low; _i<high; _i++) { \
67       if (rp[_i] > bcol) break; \
68       if (rp[_i] == bcol) { \
69         bap = ap + bs2*_i + bs*cidx + ridx; \
70         if (addv == ADD_VALUES) *bap += value;  \
71         else                    *bap  = value;  \
72         goto a_noinsert; \
73       } \
74     } \
75     if (a->nonew == 1) goto a_noinsert; \
76     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
77     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
78     N = nrow++ - 1;  \
79     /* shift up all the later entries in this row */ \
80     for (ii=N; ii>=_i; ii--) { \
81       rp[ii+1] = rp[ii]; \
82       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
83     } \
84     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
85     rp[_i]                      = bcol;  \
86     ap[bs2*_i + bs*cidx + ridx] = value;  \
87     A->nonzerostate++;\
88 a_noinsert:; \
89     ailen[brow] = nrow; \
90   }
91 
92 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
93   { \
94     brow = row/bs;  \
95     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
96     rmax = bimax[brow]; nrow = bilen[brow]; \
97     bcol = col/bs; \
98     ridx = row % bs; cidx = col % bs; \
99     low  = 0; high = nrow; \
100     while (high-low > 3) { \
101       t = (low+high)/2; \
102       if (rp[t] > bcol) high = t; \
103       else              low  = t; \
104     } \
105     for (_i=low; _i<high; _i++) { \
106       if (rp[_i] > bcol) break; \
107       if (rp[_i] == bcol) { \
108         bap = ap + bs2*_i + bs*cidx + ridx; \
109         if (addv == ADD_VALUES) *bap += value;  \
110         else                    *bap  = value;  \
111         goto b_noinsert; \
112       } \
113     } \
114     if (b->nonew == 1) goto b_noinsert; \
115     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
116     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
117     N = nrow++ - 1;  \
118     /* shift up all the later entries in this row */ \
119     for (ii=N; ii>=_i; ii--) { \
120       rp[ii+1] = rp[ii]; \
121       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122     } \
123     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
124     rp[_i]                      = bcol;  \
125     ap[bs2*_i + bs*cidx + ridx] = value;  \
126     B->nonzerostate++;\
127 b_noinsert:; \
128     bilen[brow] = nrow; \
129   }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat          A     = baij->A;
149   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar    *aa   =a->a;
152 
153   Mat         B     = baij->B;
154   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar   *ba   =b->a;
157 
158   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar *ap,*bap;
161 
162   /* for stash */
163   PetscInt  n_loc, *in_loc = NULL;
164   MatScalar *v_loc = NULL;
165 
166   PetscFunctionBegin;
167   if (!baij->donotstash) {
168     if (n > baij->n_loc) {
169       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
170       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
171       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
173 
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs) {
189           if (a->ignore_ltriangular) {
190             continue;    /* ignore lower triangular blocks */
191           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
192         }
193         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
194           col  = in[j] - cstart_orig;         /* local col index */
195           brow = row/bs; bcol = col/bs;
196           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
197           if (roworiented) value = v[i*n+j];
198           else             value = v[i+j*m];
199           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
200           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
201         } else if (in[j] < 0) continue;
202 #if defined(PETSC_USE_DEBUG)
203         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
204 #endif
205         else {  /* off-diag entry (B) */
206           if (mat->was_assembled) {
207             if (!baij->colmap) {
208               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
209             }
210 #if defined(PETSC_USE_CTABLE)
211             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
212             col  = col - 1;
213 #else
214             col = baij->colmap[in[j]/bs] - 1;
215 #endif
216             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
217               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
218               col  =  in[j];
219               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
220               B    = baij->B;
221               b    = (Mat_SeqBAIJ*)(B)->data;
222               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
223               ba   = b->a;
224             } else col += in[j]%bs;
225           } else col = in[j];
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         mat->assembled = PETSC_FALSE;
236         n_loc          = 0;
237         for (j=0; j<n; j++) {
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray     =baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if (!barray) {
269     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if ((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++ = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend) {
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         } else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined(PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined(PETSC_USE_CTABLE)
333             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col  = in[j];
341             }
342           } else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
348       if (!baij->donotstash) {
349         if (roworiented) {
350           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         } else {
352           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         }
354       }
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatGetValues_MPISBAIJ"
362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
363 {
364   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
365   PetscErrorCode ierr;
366   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
367   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
368 
369   PetscFunctionBegin;
370   for (i=0; i<m; i++) {
371     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
372     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
373     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
374       row = idxm[i] - bsrstart;
375       for (j=0; j<n; j++) {
376         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
377         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
378         if (idxn[j] >= bscstart && idxn[j] < bscend) {
379           col  = idxn[j] - bscstart;
380           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
381         } else {
382           if (!baij->colmap) {
383             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
384           }
385 #if defined(PETSC_USE_CTABLE)
386           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
387           data--;
388 #else
389           data = baij->colmap[idxn[j]/bs]-1;
390 #endif
391           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
392           else {
393             col  = data + idxn[j]%bs;
394             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
395           }
396         }
397       }
398     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
399   }
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNCT__
404 #define __FUNCT__ "MatNorm_MPISBAIJ"
405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
406 {
407   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscReal      sum[2],*lnorm2;
410 
411   PetscFunctionBegin;
412   if (baij->size == 1) {
413     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
414   } else {
415     if (type == NORM_FROBENIUS) {
416       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
417       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
418       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
419       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
420       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
421       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
422       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
423       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
424     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
425       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
426       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
427       PetscReal    *rsum,*rsum2,vabs;
428       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
429       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
430       MatScalar    *v;
431 
432       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
433       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz   = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++) {
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++) {
442             for (row=0; row<bs; row++) {
443               vabs            = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++) {
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++) {
459             for (row=0; row<bs; row++) {
460               vabs            = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap->N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
473     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
474   }
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
483   PetscErrorCode ierr;
484   PetscInt       nstash,reallocs;
485   InsertMode     addv;
486 
487   PetscFunctionBegin;
488   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
489 
490   /* make sure all processors are either in INSERTMODE or ADDMODE */
491   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
492   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
493   mat->insertmode = addv; /* in case this processor had no cache */
494 
495   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
496   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
497   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
498   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
499   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
500   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
507 {
508   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
509   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
510   PetscErrorCode ierr;
511   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
512   PetscInt       *row,*col;
513   PetscBool      other_disassembled;
514   PetscMPIInt    n;
515   PetscBool      r1,r2,r3;
516   MatScalar      *val;
517   InsertMode     addv = mat->insertmode;
518 
519   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
520   PetscFunctionBegin;
521   if (!baij->donotstash &&  !mat->nooffprocentries) {
522     while (1) {
523       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
524       if (!flg) break;
525 
526       for (i=0; i<n;) {
527         /* Now identify the consecutive vals belonging to the same row */
528         for (j=i,rstart=row[j]; j<n; j++) {
529           if (row[j] != rstart) break;
530         }
531         if (j < n) ncols = j-i;
532         else       ncols = n-i;
533         /* Now assemble all these values with a single function call */
534         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
535         i    = j;
536       }
537     }
538     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
539     /* Now process the block-stash. Since the values are stashed column-oriented,
540        set the roworiented flag to column oriented, and after MatSetValues()
541        restore the original flags */
542     r1 = baij->roworiented;
543     r2 = a->roworiented;
544     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
545 
546     baij->roworiented = PETSC_FALSE;
547     a->roworiented    = PETSC_FALSE;
548 
549     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
550     while (1) {
551       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
552       if (!flg) break;
553 
554       for (i=0; i<n;) {
555         /* Now identify the consecutive vals belonging to the same row */
556         for (j=i,rstart=row[j]; j<n; j++) {
557           if (row[j] != rstart) break;
558         }
559         if (j < n) ncols = j-i;
560         else       ncols = n-i;
561         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
562         i    = j;
563       }
564     }
565     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
566 
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569 
570     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
571   }
572 
573   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
591   }
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
596 
597   baij->rowvalues = 0;
598 
599   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
600   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
601     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
602     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
609 #include <petscdraw.h>
610 #undef __FUNCT__
611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
613 {
614   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
615   PetscErrorCode    ierr;
616   PetscInt          bs   = mat->rmap->bs;
617   PetscMPIInt       rank = baij->rank;
618   PetscBool         iascii,isdraw;
619   PetscViewer       sviewer;
620   PetscViewerFormat format;
621 
622   PetscFunctionBegin;
623   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
624   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
625   if (iascii) {
626     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
627     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
628       MatInfo info;
629       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
630       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
633       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
634       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
635       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
636       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
637       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
638       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
639       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
640       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
641       PetscFunctionReturn(0);
642     } else if (format == PETSC_VIEWER_ASCII_INFO) {
643       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
644       PetscFunctionReturn(0);
645     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
646       PetscFunctionReturn(0);
647     }
648   }
649 
650   if (isdraw) {
651     PetscDraw draw;
652     PetscBool isnull;
653     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
654     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
655   }
656 
657   {
658     /* assemble the entire matrix onto first processor. */
659     Mat          A;
660     Mat_SeqSBAIJ *Aloc;
661     Mat_SeqBAIJ  *Bloc;
662     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
663     MatScalar    *a;
664     const char   *matname;
665 
666     /* Should this be the same type as mat? */
667     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
668     if (!rank) {
669       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
670     } else {
671       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
672     }
673     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
674     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
675     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
676     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
677 
678     /* copy over the A part */
679     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
680     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
681     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
682 
683     for (i=0; i<mbs; i++) {
684       rvals[0] = bs*(baij->rstartbs + i);
685       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
686       for (j=ai[i]; j<ai[i+1]; j++) {
687         col = (baij->cstartbs+aj[j])*bs;
688         for (k=0; k<bs; k++) {
689           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
690           col++;
691           a += bs;
692         }
693       }
694     }
695     /* copy over the B part */
696     Bloc = (Mat_SeqBAIJ*)baij->B->data;
697     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
698     for (i=0; i<mbs; i++) {
699 
700       rvals[0] = bs*(baij->rstartbs + i);
701       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
702       for (j=ai[i]; j<ai[i+1]; j++) {
703         col = baij->garray[aj[j]]*bs;
704         for (k=0; k<bs; k++) {
705           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
706           col++;
707           a += bs;
708         }
709       }
710     }
711     ierr = PetscFree(rvals);CHKERRQ(ierr);
712     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
713     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
714     /*
715        Everyone has to call to draw the matrix since the graphics waits are
716        synchronized across all processors that share the PetscDraw object
717     */
718     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
719     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
722       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
723     }
724     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
725     ierr = MatDestroy(&A);CHKERRQ(ierr);
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatView_MPISBAIJ"
732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
733 {
734   PetscErrorCode ierr;
735   PetscBool      iascii,isdraw,issocket,isbinary;
736 
737   PetscFunctionBegin;
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
739   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
740   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
741   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
742   if (iascii || isdraw || issocket || isbinary) {
743     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatDestroy_MPISBAIJ"
750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
751 {
752   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
753   PetscErrorCode ierr;
754 
755   PetscFunctionBegin;
756 #if defined(PETSC_USE_LOG)
757   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
758 #endif
759   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
760   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
761   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
762   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
763 #if defined(PETSC_USE_CTABLE)
764   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
765 #else
766   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
767 #endif
768   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
770   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
774   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
776   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
777   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
778   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
779   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
780   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
781   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
782   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
783 #if defined(PETSC_USE_REAL_MAT_SINGLE)
784   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
785 #endif
786   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
788   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
789   ierr = PetscFree(mat->data);CHKERRQ(ierr);
790 
791   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
795   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
796   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
797 #if defined(PETSC_HAVE_ELEMENTAL)
798   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
799 #endif
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
805 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
806 {
807   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
808   PetscErrorCode ierr;
809   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
810   PetscScalar    *x,*from;
811 
812   PetscFunctionBegin;
813   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
814   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
815 
816   /* diagonal part */
817   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
818   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
819 
820   /* subdiagonal part */
821   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
822 
823   /* copy x into the vec slvec0 */
824   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
825   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
826 
827   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
828   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
829   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
830 
831   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
832   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
833   /* supperdiagonal part */
834   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatMult_MPISBAIJ"
840 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
841 {
842   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
843   PetscErrorCode ierr;
844   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
845   PetscScalar    *x,*from;
846 
847   PetscFunctionBegin;
848   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
849   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
850 
851   /* diagonal part */
852   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
853   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
854 
855   /* subdiagonal part */
856   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
857 
858   /* copy x into the vec slvec0 */
859   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
860   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
861 
862   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
863   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
864   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
865 
866   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   /* supperdiagonal part */
869   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
875 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
876 {
877   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
878   PetscErrorCode ierr;
879   PetscInt       nt;
880 
881   PetscFunctionBegin;
882   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
883   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
884 
885   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
886   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
887 
888   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889   /* do diagonal part */
890   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
891   /* do supperdiagonal part */
892   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
894   /* do subdiagonal part */
895   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
896   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
897   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
903 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
904 {
905   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
906   PetscErrorCode ierr;
907   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
908   PetscScalar    *x,*from,zero=0.0;
909 
910   PetscFunctionBegin;
911   /*
912   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
913   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
914   */
915   /* diagonal part */
916   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
917   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
918 
919   /* subdiagonal part */
920   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
921 
922   /* copy x into the vec slvec0 */
923   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
924   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
925   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
926   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
927 
928   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
929   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
930   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931 
932   /* supperdiagonal part */
933   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
934   PetscFunctionReturn(0);
935 }
936 
937 #undef __FUNCT__
938 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
939 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
940 {
941   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
942   PetscErrorCode ierr;
943 
944   PetscFunctionBegin;
945   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   /* do diagonal part */
947   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
948   /* do supperdiagonal part */
949   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
951 
952   /* do subdiagonal part */
953   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
954   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
955   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
956   PetscFunctionReturn(0);
957 }
958 
959 /*
960   This only works correctly for square matrices where the subblock A->A is the
961    diagonal block
962 */
963 #undef __FUNCT__
964 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
965 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
966 {
967   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
968   PetscErrorCode ierr;
969 
970   PetscFunctionBegin;
971   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
972   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
973   PetscFunctionReturn(0);
974 }
975 
976 #undef __FUNCT__
977 #define __FUNCT__ "MatScale_MPISBAIJ"
978 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
979 {
980   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
981   PetscErrorCode ierr;
982 
983   PetscFunctionBegin;
984   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
985   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "MatGetRow_MPISBAIJ"
991 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
992 {
993   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
994   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
995   PetscErrorCode ierr;
996   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
997   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
998   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
999 
1000   PetscFunctionBegin;
1001   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1002   mat->getrowactive = PETSC_TRUE;
1003 
1004   if (!mat->rowvalues && (idx || v)) {
1005     /*
1006         allocate enough space to hold information from the longest row.
1007     */
1008     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1009     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1010     PetscInt     max = 1,mbs = mat->mbs,tmp;
1011     for (i=0; i<mbs; i++) {
1012       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1013       if (max < tmp) max = tmp;
1014     }
1015     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1016   }
1017 
1018   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1019   lrow = row - brstart;  /* local row index */
1020 
1021   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1022   if (!v)   {pvA = 0; pvB = 0;}
1023   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1024   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1025   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1026   nztot = nzA + nzB;
1027 
1028   cmap = mat->garray;
1029   if (v  || idx) {
1030     if (nztot) {
1031       /* Sort by increasing column numbers, assuming A and B already sorted */
1032       PetscInt imark = -1;
1033       if (v) {
1034         *v = v_p = mat->rowvalues;
1035         for (i=0; i<nzB; i++) {
1036           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1037           else break;
1038         }
1039         imark = i;
1040         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1041         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1042       }
1043       if (idx) {
1044         *idx = idx_p = mat->rowindices;
1045         if (imark > -1) {
1046           for (i=0; i<imark; i++) {
1047             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1048           }
1049         } else {
1050           for (i=0; i<nzB; i++) {
1051             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1052             else break;
1053           }
1054           imark = i;
1055         }
1056         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1057         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1058       }
1059     } else {
1060       if (idx) *idx = 0;
1061       if (v)   *v   = 0;
1062     }
1063   }
1064   *nz  = nztot;
1065   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1066   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1072 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1073 {
1074   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1075 
1076   PetscFunctionBegin;
1077   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1078   baij->getrowactive = PETSC_FALSE;
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1084 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1085 {
1086   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1087   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1088 
1089   PetscFunctionBegin;
1090   aA->getrow_utriangular = PETSC_TRUE;
1091   PetscFunctionReturn(0);
1092 }
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1095 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1096 {
1097   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1098   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1099 
1100   PetscFunctionBegin;
1101   aA->getrow_utriangular = PETSC_FALSE;
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1107 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1108 {
1109   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1114   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1120 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1121 {
1122   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1127   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1133 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1134 {
1135   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1140   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1146 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1147 {
1148   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1149   Mat            A  = a->A,B = a->B;
1150   PetscErrorCode ierr;
1151   PetscReal      isend[5],irecv[5];
1152 
1153   PetscFunctionBegin;
1154   info->block_size = (PetscReal)matin->rmap->bs;
1155 
1156   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1157 
1158   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1159   isend[3] = info->memory;  isend[4] = info->mallocs;
1160 
1161   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1162 
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1173 
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else if (flag == MAT_GLOBAL_SUM) {
1180     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1181 
1182     info->nz_used      = irecv[0];
1183     info->nz_allocated = irecv[1];
1184     info->nz_unneeded  = irecv[2];
1185     info->memory       = irecv[3];
1186     info->mallocs      = irecv[4];
1187   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1188   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1189   info->fill_ratio_needed = 0;
1190   info->factor_mallocs    = 0;
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1196 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1197 {
1198   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1199   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   switch (op) {
1204   case MAT_NEW_NONZERO_LOCATIONS:
1205   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1206   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1207   case MAT_KEEP_NONZERO_PATTERN:
1208   case MAT_NEW_NONZERO_LOCATION_ERR:
1209     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1210     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1211     break;
1212   case MAT_ROW_ORIENTED:
1213     a->roworiented = flg;
1214 
1215     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1216     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1217     break;
1218   case MAT_NEW_DIAGONALS:
1219     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1220     break;
1221   case MAT_IGNORE_OFF_PROC_ENTRIES:
1222     a->donotstash = flg;
1223     break;
1224   case MAT_USE_HASH_TABLE:
1225     a->ht_flag = flg;
1226     break;
1227   case MAT_HERMITIAN:
1228     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1229     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1230 
1231     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1232     break;
1233   case MAT_SPD:
1234     A->spd_set = PETSC_TRUE;
1235     A->spd     = flg;
1236     if (flg) {
1237       A->symmetric                  = PETSC_TRUE;
1238       A->structurally_symmetric     = PETSC_TRUE;
1239       A->symmetric_set              = PETSC_TRUE;
1240       A->structurally_symmetric_set = PETSC_TRUE;
1241     }
1242     break;
1243   case MAT_SYMMETRIC:
1244     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1245     break;
1246   case MAT_STRUCTURALLY_SYMMETRIC:
1247     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1248     break;
1249   case MAT_SYMMETRY_ETERNAL:
1250     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1251     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1252     break;
1253   case MAT_IGNORE_LOWER_TRIANGULAR:
1254     aA->ignore_ltriangular = flg;
1255     break;
1256   case MAT_ERROR_LOWER_TRIANGULAR:
1257     aA->ignore_ltriangular = flg;
1258     break;
1259   case MAT_GETROW_UPPERTRIANGULAR:
1260     aA->getrow_utriangular = flg;
1261     break;
1262   default:
1263     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1264   }
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1270 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1271 {
1272   PetscErrorCode ierr;
1273 
1274   PetscFunctionBegin;
1275   if (MAT_INITIAL_MATRIX || *B != A) {
1276     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1277   }
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1283 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1284 {
1285   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1286   Mat            a     = baij->A, b=baij->B;
1287   PetscErrorCode ierr;
1288   PetscInt       nv,m,n;
1289   PetscBool      flg;
1290 
1291   PetscFunctionBegin;
1292   if (ll != rr) {
1293     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1294     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1295   }
1296   if (!ll) PetscFunctionReturn(0);
1297 
1298   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1299   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1300 
1301   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1302   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1303 
1304   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305 
1306   /* left diagonalscale the off-diagonal part */
1307   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1308 
1309   /* scale the diagonal part */
1310   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1311 
1312   /* right diagonalscale the off-diagonal part */
1313   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1314   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1320 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1321 {
1322   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatEqual_MPISBAIJ"
1334 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1335 {
1336   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1337   Mat            a,b,c,d;
1338   PetscBool      flg;
1339   PetscErrorCode ierr;
1340 
1341   PetscFunctionBegin;
1342   a = matA->A; b = matA->B;
1343   c = matB->A; d = matB->B;
1344 
1345   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1346   if (flg) {
1347     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1348   }
1349   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatCopy_MPISBAIJ"
1355 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1356 {
1357   PetscErrorCode ierr;
1358   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1359   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1360 
1361   PetscFunctionBegin;
1362   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1363   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1364     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1365     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1366     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1367   } else {
1368     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1369     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1376 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1377 {
1378   PetscErrorCode ierr;
1379 
1380   PetscFunctionBegin;
1381   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1387 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1388 {
1389   PetscErrorCode ierr;
1390   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1391   PetscBLASInt   bnz,one=1;
1392   Mat_SeqSBAIJ   *xa,*ya;
1393   Mat_SeqBAIJ    *xb,*yb;
1394 
1395   PetscFunctionBegin;
1396   if (str == SAME_NONZERO_PATTERN) {
1397     PetscScalar alpha = a;
1398     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1399     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1400     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1401     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1402     xb   = (Mat_SeqBAIJ*)xx->B->data;
1403     yb   = (Mat_SeqBAIJ*)yy->B->data;
1404     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1405     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1406     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1407   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1408     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1409     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1410     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1411   } else {
1412     Mat      B;
1413     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1414     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1415     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1416     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1417     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1418     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1419     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1420     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1421     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1422     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1423     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1424     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1425     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1426     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1427     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1428     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1429     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1430     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1431     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1432     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1433   }
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1439 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1440 {
1441   PetscErrorCode ierr;
1442   PetscInt       i;
1443   PetscBool      flg;
1444 
1445   PetscFunctionBegin;
1446   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1447   for (i=0; i<n; i++) {
1448     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1449     if (!flg) { /* *B[i] is non-symmetric, set flag */
1450       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1451     }
1452   }
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 /* -------------------------------------------------------------------*/
1457 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1458                                        MatGetRow_MPISBAIJ,
1459                                        MatRestoreRow_MPISBAIJ,
1460                                        MatMult_MPISBAIJ,
1461                                /*  4*/ MatMultAdd_MPISBAIJ,
1462                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1463                                        MatMultAdd_MPISBAIJ,
1464                                        0,
1465                                        0,
1466                                        0,
1467                                /* 10*/ 0,
1468                                        0,
1469                                        0,
1470                                        MatSOR_MPISBAIJ,
1471                                        MatTranspose_MPISBAIJ,
1472                                /* 15*/ MatGetInfo_MPISBAIJ,
1473                                        MatEqual_MPISBAIJ,
1474                                        MatGetDiagonal_MPISBAIJ,
1475                                        MatDiagonalScale_MPISBAIJ,
1476                                        MatNorm_MPISBAIJ,
1477                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1478                                        MatAssemblyEnd_MPISBAIJ,
1479                                        MatSetOption_MPISBAIJ,
1480                                        MatZeroEntries_MPISBAIJ,
1481                                /* 24*/ 0,
1482                                        0,
1483                                        0,
1484                                        0,
1485                                        0,
1486                                /* 29*/ MatSetUp_MPISBAIJ,
1487                                        0,
1488                                        0,
1489                                        0,
1490                                        0,
1491                                /* 34*/ MatDuplicate_MPISBAIJ,
1492                                        0,
1493                                        0,
1494                                        0,
1495                                        0,
1496                                /* 39*/ MatAXPY_MPISBAIJ,
1497                                        MatGetSubMatrices_MPISBAIJ,
1498                                        MatIncreaseOverlap_MPISBAIJ,
1499                                        MatGetValues_MPISBAIJ,
1500                                        MatCopy_MPISBAIJ,
1501                                /* 44*/ 0,
1502                                        MatScale_MPISBAIJ,
1503                                        0,
1504                                        0,
1505                                        0,
1506                                /* 49*/ 0,
1507                                        0,
1508                                        0,
1509                                        0,
1510                                        0,
1511                                /* 54*/ 0,
1512                                        0,
1513                                        MatSetUnfactored_MPISBAIJ,
1514                                        0,
1515                                        MatSetValuesBlocked_MPISBAIJ,
1516                                /* 59*/ 0,
1517                                        0,
1518                                        0,
1519                                        0,
1520                                        0,
1521                                /* 64*/ 0,
1522                                        0,
1523                                        0,
1524                                        0,
1525                                        0,
1526                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1527                                        0,
1528                                        0,
1529                                        0,
1530                                        0,
1531                                /* 74*/ 0,
1532                                        0,
1533                                        0,
1534                                        0,
1535                                        0,
1536                                /* 79*/ 0,
1537                                        0,
1538                                        0,
1539                                        0,
1540                                        MatLoad_MPISBAIJ,
1541                                /* 84*/ 0,
1542                                        0,
1543                                        0,
1544                                        0,
1545                                        0,
1546                                /* 89*/ 0,
1547                                        0,
1548                                        0,
1549                                        0,
1550                                        0,
1551                                /* 94*/ 0,
1552                                        0,
1553                                        0,
1554                                        0,
1555                                        0,
1556                                /* 99*/ 0,
1557                                        0,
1558                                        0,
1559                                        0,
1560                                        0,
1561                                /*104*/ 0,
1562                                        MatRealPart_MPISBAIJ,
1563                                        MatImaginaryPart_MPISBAIJ,
1564                                        MatGetRowUpperTriangular_MPISBAIJ,
1565                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1566                                /*109*/ 0,
1567                                        0,
1568                                        0,
1569                                        0,
1570                                        0,
1571                                /*114*/ 0,
1572                                        0,
1573                                        0,
1574                                        0,
1575                                        0,
1576                                /*119*/ 0,
1577                                        0,
1578                                        0,
1579                                        0,
1580                                        0,
1581                                /*124*/ 0,
1582                                        0,
1583                                        0,
1584                                        0,
1585                                        0,
1586                                /*129*/ 0,
1587                                        0,
1588                                        0,
1589                                        0,
1590                                        0,
1591                                /*134*/ 0,
1592                                        0,
1593                                        0,
1594                                        0,
1595                                        0,
1596                                /*139*/ 0,
1597                                        0,
1598                                        0,
1599                                        0,
1600                                        0,
1601                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1602 };
1603 
1604 #undef __FUNCT__
1605 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1606 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1607 {
1608   PetscFunctionBegin;
1609   *a = ((Mat_MPISBAIJ*)A->data)->A;
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 #undef __FUNCT__
1614 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1615 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1616 {
1617   Mat_MPISBAIJ   *b;
1618   PetscErrorCode ierr;
1619   PetscInt       i,mbs,Mbs;
1620 
1621   PetscFunctionBegin;
1622   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1623   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1624   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1625   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1626 
1627   b   = (Mat_MPISBAIJ*)B->data;
1628   mbs = B->rmap->n/bs;
1629   Mbs = B->rmap->N/bs;
1630   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1631 
1632   B->rmap->bs = bs;
1633   b->bs2      = bs*bs;
1634   b->mbs      = mbs;
1635   b->Mbs      = Mbs;
1636   b->nbs      = B->cmap->n/bs;
1637   b->Nbs      = B->cmap->N/bs;
1638 
1639   for (i=0; i<=b->size; i++) {
1640     b->rangebs[i] = B->rmap->range[i]/bs;
1641   }
1642   b->rstartbs = B->rmap->rstart/bs;
1643   b->rendbs   = B->rmap->rend/bs;
1644 
1645   b->cstartbs = B->cmap->rstart/bs;
1646   b->cendbs   = B->cmap->rend/bs;
1647 
1648   if (!B->preallocated) {
1649     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1650     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1651     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1652     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1653     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1654     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1655     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1656     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1657     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1658   }
1659 
1660   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1661   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1662 
1663   B->preallocated = PETSC_TRUE;
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1669 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1670 {
1671   PetscInt       m,rstart,cstart,cend;
1672   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1673   const PetscInt *JJ    =0;
1674   PetscScalar    *values=0;
1675   PetscErrorCode ierr;
1676 
1677   PetscFunctionBegin;
1678   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1679   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1680   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1681   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1682   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1683   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1684   m      = B->rmap->n/bs;
1685   rstart = B->rmap->rstart/bs;
1686   cstart = B->cmap->rstart/bs;
1687   cend   = B->cmap->rend/bs;
1688 
1689   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1690   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1691   for (i=0; i<m; i++) {
1692     nz = ii[i+1] - ii[i];
1693     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1694     nz_max = PetscMax(nz_max,nz);
1695     JJ     = jj + ii[i];
1696     for (j=0; j<nz; j++) {
1697       if (*JJ >= cstart) break;
1698       JJ++;
1699     }
1700     d = 0;
1701     for (; j<nz; j++) {
1702       if (*JJ++ >= cend) break;
1703       d++;
1704     }
1705     d_nnz[i] = d;
1706     o_nnz[i] = nz - d;
1707   }
1708   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1709   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1710 
1711   values = (PetscScalar*)V;
1712   if (!values) {
1713     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1714     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1715   }
1716   for (i=0; i<m; i++) {
1717     PetscInt          row    = i + rstart;
1718     PetscInt          ncols  = ii[i+1] - ii[i];
1719     const PetscInt    *icols = jj + ii[i];
1720     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1721     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1722   }
1723 
1724   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1725   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1726   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1727   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 /*MC
1732    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1733    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1734    the matrix is stored.
1735 
1736   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1737   can call MatSetOption(Mat, MAT_HERMITIAN);
1738 
1739    Options Database Keys:
1740 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1741 
1742   Level: beginner
1743 
1744 .seealso: MatCreateMPISBAIJ
1745 M*/
1746 
1747 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1748 
1749 #undef __FUNCT__
1750 #define __FUNCT__ "MatCreate_MPISBAIJ"
1751 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1752 {
1753   Mat_MPISBAIJ   *b;
1754   PetscErrorCode ierr;
1755   PetscBool      flg = PETSC_FALSE;
1756 
1757   PetscFunctionBegin;
1758   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1759   B->data = (void*)b;
1760   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1761 
1762   B->ops->destroy = MatDestroy_MPISBAIJ;
1763   B->ops->view    = MatView_MPISBAIJ;
1764   B->assembled    = PETSC_FALSE;
1765   B->insertmode   = NOT_SET_VALUES;
1766 
1767   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1768   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1769 
1770   /* build local table of row and column ownerships */
1771   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
1772 
1773   /* build cache for off array entries formed */
1774   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1775 
1776   b->donotstash  = PETSC_FALSE;
1777   b->colmap      = NULL;
1778   b->garray      = NULL;
1779   b->roworiented = PETSC_TRUE;
1780 
1781   /* stuff used in block assembly */
1782   b->barray = 0;
1783 
1784   /* stuff used for matrix vector multiply */
1785   b->lvec    = 0;
1786   b->Mvctx   = 0;
1787   b->slvec0  = 0;
1788   b->slvec0b = 0;
1789   b->slvec1  = 0;
1790   b->slvec1a = 0;
1791   b->slvec1b = 0;
1792   b->sMvctx  = 0;
1793 
1794   /* stuff for MatGetRow() */
1795   b->rowindices   = 0;
1796   b->rowvalues    = 0;
1797   b->getrowactive = PETSC_FALSE;
1798 
1799   /* hash table stuff */
1800   b->ht           = 0;
1801   b->hd           = 0;
1802   b->ht_size      = 0;
1803   b->ht_flag      = PETSC_FALSE;
1804   b->ht_fact      = 0;
1805   b->ht_total_ct  = 0;
1806   b->ht_insert_ct = 0;
1807 
1808   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1809   b->ijonly = PETSC_FALSE;
1810 
1811   b->in_loc = 0;
1812   b->v_loc  = 0;
1813   b->n_loc  = 0;
1814 
1815   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1816   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1817   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1818   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1819   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1820   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1821 #if defined(PETSC_HAVE_ELEMENTAL)
1822   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
1823 #endif
1824 
1825   B->symmetric                  = PETSC_TRUE;
1826   B->structurally_symmetric     = PETSC_TRUE;
1827   B->symmetric_set              = PETSC_TRUE;
1828   B->structurally_symmetric_set = PETSC_TRUE;
1829 
1830   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1831   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1832   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
1833   if (flg) {
1834     PetscReal fact = 1.39;
1835     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1836     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1837     if (fact <= 1.0) fact = 1.39;
1838     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1839     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1840   }
1841   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 /*MC
1846    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1847 
1848    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1849    and MATMPISBAIJ otherwise.
1850 
1851    Options Database Keys:
1852 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1853 
1854   Level: beginner
1855 
1856 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1857 M*/
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1861 /*@C
1862    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1863    the user should preallocate the matrix storage by setting the parameters
1864    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1865    performance can be increased by more than a factor of 50.
1866 
1867    Collective on Mat
1868 
1869    Input Parameters:
1870 +  B - the matrix
1871 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1872           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1873 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1874            submatrix  (same for all local rows)
1875 .  d_nnz - array containing the number of block nonzeros in the various block rows
1876            in the upper triangular and diagonal part of the in diagonal portion of the local
1877            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1878            for the diagonal entry and set a value even if it is zero.
1879 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1880            submatrix (same for all local rows).
1881 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1882            off-diagonal portion of the local submatrix that is right of the diagonal
1883            (possibly different for each block row) or NULL.
1884 
1885 
1886    Options Database Keys:
1887 .   -mat_no_unroll - uses code that does not unroll the loops in the
1888                      block calculations (much slower)
1889 .   -mat_block_size - size of the blocks to use
1890 
1891    Notes:
1892 
1893    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1894    than it must be used on all processors that share the object for that argument.
1895 
1896    If the *_nnz parameter is given then the *_nz parameter is ignored
1897 
1898    Storage Information:
1899    For a square global matrix we define each processor's diagonal portion
1900    to be its local rows and the corresponding columns (a square submatrix);
1901    each processor's off-diagonal portion encompasses the remainder of the
1902    local matrix (a rectangular submatrix).
1903 
1904    The user can specify preallocated storage for the diagonal part of
1905    the local submatrix with either d_nz or d_nnz (not both).  Set
1906    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1907    memory allocation.  Likewise, specify preallocated storage for the
1908    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1909 
1910    You can call MatGetInfo() to get information on how effective the preallocation was;
1911    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1912    You can also run with the option -info and look for messages with the string
1913    malloc in them to see if additional memory allocation was needed.
1914 
1915    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1916    the figure below we depict these three local rows and all columns (0-11).
1917 
1918 .vb
1919            0 1 2 3 4 5 6 7 8 9 10 11
1920           --------------------------
1921    row 3  |. . . d d d o o o o  o  o
1922    row 4  |. . . d d d o o o o  o  o
1923    row 5  |. . . d d d o o o o  o  o
1924           --------------------------
1925 .ve
1926 
1927    Thus, any entries in the d locations are stored in the d (diagonal)
1928    submatrix, and any entries in the o locations are stored in the
1929    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1930    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1931 
1932    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1933    plus the diagonal part of the d matrix,
1934    and o_nz should indicate the number of block nonzeros per row in the o matrix
1935 
1936    In general, for PDE problems in which most nonzeros are near the diagonal,
1937    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1938    or you will get TERRIBLE performance; see the users' manual chapter on
1939    matrices.
1940 
1941    Level: intermediate
1942 
1943 .keywords: matrix, block, aij, compressed row, sparse, parallel
1944 
1945 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1946 @*/
1947 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1948 {
1949   PetscErrorCode ierr;
1950 
1951   PetscFunctionBegin;
1952   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1953   PetscValidType(B,1);
1954   PetscValidLogicalCollectiveInt(B,bs,2);
1955   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #undef __FUNCT__
1960 #define __FUNCT__ "MatCreateSBAIJ"
1961 /*@C
1962    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1963    (block compressed row).  For good matrix assembly performance
1964    the user should preallocate the matrix storage by setting the parameters
1965    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1966    performance can be increased by more than a factor of 50.
1967 
1968    Collective on MPI_Comm
1969 
1970    Input Parameters:
1971 +  comm - MPI communicator
1972 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1973           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1974 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1975            This value should be the same as the local size used in creating the
1976            y vector for the matrix-vector product y = Ax.
1977 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1978            This value should be the same as the local size used in creating the
1979            x vector for the matrix-vector product y = Ax.
1980 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1981 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1982 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1983            submatrix  (same for all local rows)
1984 .  d_nnz - array containing the number of block nonzeros in the various block rows
1985            in the upper triangular portion of the in diagonal portion of the local
1986            (possibly different for each block block row) or NULL.
1987            If you plan to factor the matrix you must leave room for the diagonal entry and
1988            set its value even if it is zero.
1989 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1990            submatrix (same for all local rows).
1991 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1992            off-diagonal portion of the local submatrix (possibly different for
1993            each block row) or NULL.
1994 
1995    Output Parameter:
1996 .  A - the matrix
1997 
1998    Options Database Keys:
1999 .   -mat_no_unroll - uses code that does not unroll the loops in the
2000                      block calculations (much slower)
2001 .   -mat_block_size - size of the blocks to use
2002 .   -mat_mpi - use the parallel matrix data structures even on one processor
2003                (defaults to using SeqBAIJ format on one processor)
2004 
2005    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2006    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2007    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2008 
2009    Notes:
2010    The number of rows and columns must be divisible by blocksize.
2011    This matrix type does not support complex Hermitian operation.
2012 
2013    The user MUST specify either the local or global matrix dimensions
2014    (possibly both).
2015 
2016    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2017    than it must be used on all processors that share the object for that argument.
2018 
2019    If the *_nnz parameter is given then the *_nz parameter is ignored
2020 
2021    Storage Information:
2022    For a square global matrix we define each processor's diagonal portion
2023    to be its local rows and the corresponding columns (a square submatrix);
2024    each processor's off-diagonal portion encompasses the remainder of the
2025    local matrix (a rectangular submatrix).
2026 
2027    The user can specify preallocated storage for the diagonal part of
2028    the local submatrix with either d_nz or d_nnz (not both).  Set
2029    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2030    memory allocation.  Likewise, specify preallocated storage for the
2031    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2032 
2033    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2034    the figure below we depict these three local rows and all columns (0-11).
2035 
2036 .vb
2037            0 1 2 3 4 5 6 7 8 9 10 11
2038           --------------------------
2039    row 3  |. . . d d d o o o o  o  o
2040    row 4  |. . . d d d o o o o  o  o
2041    row 5  |. . . d d d o o o o  o  o
2042           --------------------------
2043 .ve
2044 
2045    Thus, any entries in the d locations are stored in the d (diagonal)
2046    submatrix, and any entries in the o locations are stored in the
2047    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2048    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2049 
2050    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2051    plus the diagonal part of the d matrix,
2052    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2053    In general, for PDE problems in which most nonzeros are near the diagonal,
2054    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2055    or you will get TERRIBLE performance; see the users' manual chapter on
2056    matrices.
2057 
2058    Level: intermediate
2059 
2060 .keywords: matrix, block, aij, compressed row, sparse, parallel
2061 
2062 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2063 @*/
2064 
2065 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2066 {
2067   PetscErrorCode ierr;
2068   PetscMPIInt    size;
2069 
2070   PetscFunctionBegin;
2071   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2072   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2073   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2074   if (size > 1) {
2075     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2076     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2077   } else {
2078     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2079     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2080   }
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2087 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2088 {
2089   Mat            mat;
2090   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2091   PetscErrorCode ierr;
2092   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2093   PetscScalar    *array;
2094 
2095   PetscFunctionBegin;
2096   *newmat = 0;
2097 
2098   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2099   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2100   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2101   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2102   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2103   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2104 
2105   mat->factortype   = matin->factortype;
2106   mat->preallocated = PETSC_TRUE;
2107   mat->assembled    = PETSC_TRUE;
2108   mat->insertmode   = NOT_SET_VALUES;
2109 
2110   a      = (Mat_MPISBAIJ*)mat->data;
2111   a->bs2 = oldmat->bs2;
2112   a->mbs = oldmat->mbs;
2113   a->nbs = oldmat->nbs;
2114   a->Mbs = oldmat->Mbs;
2115   a->Nbs = oldmat->Nbs;
2116 
2117 
2118   a->size         = oldmat->size;
2119   a->rank         = oldmat->rank;
2120   a->donotstash   = oldmat->donotstash;
2121   a->roworiented  = oldmat->roworiented;
2122   a->rowindices   = 0;
2123   a->rowvalues    = 0;
2124   a->getrowactive = PETSC_FALSE;
2125   a->barray       = 0;
2126   a->rstartbs     = oldmat->rstartbs;
2127   a->rendbs       = oldmat->rendbs;
2128   a->cstartbs     = oldmat->cstartbs;
2129   a->cendbs       = oldmat->cendbs;
2130 
2131   /* hash table stuff */
2132   a->ht           = 0;
2133   a->hd           = 0;
2134   a->ht_size      = 0;
2135   a->ht_flag      = oldmat->ht_flag;
2136   a->ht_fact      = oldmat->ht_fact;
2137   a->ht_total_ct  = 0;
2138   a->ht_insert_ct = 0;
2139 
2140   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2141   if (oldmat->colmap) {
2142 #if defined(PETSC_USE_CTABLE)
2143     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2144 #else
2145     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2146     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2147     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2148 #endif
2149   } else a->colmap = 0;
2150 
2151   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2152     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2153     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2154     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2155   } else a->garray = 0;
2156 
2157   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2158   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2159   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2160   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2161   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2162 
2163   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2164   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2165   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2166   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2167 
2168   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2169   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2170   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2171   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2172   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2173   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2174   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2175   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2176   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2177   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2178   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2179   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2180   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2181 
2182   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2183   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2184   a->sMvctx = oldmat->sMvctx;
2185   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2186 
2187   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2188   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2189   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2190   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2191   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2192   *newmat = mat;
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatLoad_MPISBAIJ"
2198 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2199 {
2200   PetscErrorCode ierr;
2201   PetscInt       i,nz,j,rstart,rend;
2202   PetscScalar    *vals,*buf;
2203   MPI_Comm       comm;
2204   MPI_Status     status;
2205   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2206   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2207   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2208   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2209   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2210   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2211   int            fd;
2212 
2213   PetscFunctionBegin;
2214   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2215   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2216   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2217   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2218   if (bs < 0) bs = 1;
2219 
2220   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2221   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2222   if (!rank) {
2223     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2224     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2225     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2226     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2227   }
2228 
2229   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2230 
2231   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2232   M    = header[1];
2233   N    = header[2];
2234 
2235   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2236   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2237   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2238 
2239   /* If global sizes are set, check if they are consistent with that given in the file */
2240   if (sizesset) {
2241     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2242   }
2243   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2244   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2245 
2246   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2247 
2248   /*
2249      This code adds extra rows to make sure the number of rows is
2250      divisible by the blocksize
2251   */
2252   Mbs        = M/bs;
2253   extra_rows = bs - M + bs*(Mbs);
2254   if (extra_rows == bs) extra_rows = 0;
2255   else                  Mbs++;
2256   if (extra_rows &&!rank) {
2257     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2258   }
2259 
2260   /* determine ownership of all rows */
2261   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2262     mbs = Mbs/size + ((Mbs % size) > rank);
2263     m   = mbs*bs;
2264   } else { /* User Set */
2265     m   = newmat->rmap->n;
2266     mbs = m/bs;
2267   }
2268   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2269   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2270   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2271   rowners[0] = 0;
2272   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2273   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2274   rstart = rowners[rank];
2275   rend   = rowners[rank+1];
2276 
2277   /* distribute row lengths to all processors */
2278   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2279   if (!rank) {
2280     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2281     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2282     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2283     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2284     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2285     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2286     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2287   } else {
2288     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2289   }
2290 
2291   if (!rank) {   /* procs[0] */
2292     /* calculate the number of nonzeros on each processor */
2293     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2294     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2295     for (i=0; i<size; i++) {
2296       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2297         procsnz[i] += rowlengths[j];
2298       }
2299     }
2300     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2301 
2302     /* determine max buffer needed and allocate it */
2303     maxnz = 0;
2304     for (i=0; i<size; i++) {
2305       maxnz = PetscMax(maxnz,procsnz[i]);
2306     }
2307     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2308 
2309     /* read in my part of the matrix column indices  */
2310     nz     = procsnz[0];
2311     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2312     mycols = ibuf;
2313     if (size == 1) nz -= extra_rows;
2314     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2315     if (size == 1) {
2316       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2317     }
2318 
2319     /* read in every ones (except the last) and ship off */
2320     for (i=1; i<size-1; i++) {
2321       nz   = procsnz[i];
2322       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2323       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2324     }
2325     /* read in the stuff for the last proc */
2326     if (size != 1) {
2327       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2328       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2329       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2330       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2331     }
2332     ierr = PetscFree(cols);CHKERRQ(ierr);
2333   } else {  /* procs[i], i>0 */
2334     /* determine buffer space needed for message */
2335     nz = 0;
2336     for (i=0; i<m; i++) nz += locrowlens[i];
2337     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2338     mycols = ibuf;
2339     /* receive message of column indices*/
2340     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2341     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2342     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2343   }
2344 
2345   /* loop over local rows, determining number of off diagonal entries */
2346   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2347   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2348   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2349   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2350   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2351   rowcount = 0;
2352   nzcount  = 0;
2353   for (i=0; i<mbs; i++) {
2354     dcount  = 0;
2355     odcount = 0;
2356     for (j=0; j<bs; j++) {
2357       kmax = locrowlens[rowcount];
2358       for (k=0; k<kmax; k++) {
2359         tmp = mycols[nzcount++]/bs; /* block col. index */
2360         if (!mask[tmp]) {
2361           mask[tmp] = 1;
2362           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2363           else masked1[dcount++] = tmp; /* entry in diag portion */
2364         }
2365       }
2366       rowcount++;
2367     }
2368 
2369     dlens[i]  = dcount;  /* d_nzz[i] */
2370     odlens[i] = odcount; /* o_nzz[i] */
2371 
2372     /* zero out the mask elements we set */
2373     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2374     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2375   }
2376   if (!sizesset) {
2377     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2378   }
2379   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2380   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2381 
2382   if (!rank) {
2383     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2384     /* read in my part of the matrix numerical values  */
2385     nz     = procsnz[0];
2386     vals   = buf;
2387     mycols = ibuf;
2388     if (size == 1) nz -= extra_rows;
2389     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2390     if (size == 1) {
2391       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2392     }
2393 
2394     /* insert into matrix */
2395     jj = rstart*bs;
2396     for (i=0; i<m; i++) {
2397       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2398       mycols += locrowlens[i];
2399       vals   += locrowlens[i];
2400       jj++;
2401     }
2402 
2403     /* read in other processors (except the last one) and ship out */
2404     for (i=1; i<size-1; i++) {
2405       nz   = procsnz[i];
2406       vals = buf;
2407       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2408       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2409     }
2410     /* the last proc */
2411     if (size != 1) {
2412       nz   = procsnz[i] - extra_rows;
2413       vals = buf;
2414       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2415       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2416       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2417     }
2418     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2419 
2420   } else {
2421     /* receive numeric values */
2422     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2423 
2424     /* receive message of values*/
2425     vals   = buf;
2426     mycols = ibuf;
2427     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2428     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2429     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2430 
2431     /* insert into matrix */
2432     jj = rstart*bs;
2433     for (i=0; i<m; i++) {
2434       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2435       mycols += locrowlens[i];
2436       vals   += locrowlens[i];
2437       jj++;
2438     }
2439   }
2440 
2441   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2442   ierr = PetscFree(buf);CHKERRQ(ierr);
2443   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2444   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2445   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2446   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2447   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2454 /*XXXXX@
2455    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2456 
2457    Input Parameters:
2458 .  mat  - the matrix
2459 .  fact - factor
2460 
2461    Not Collective on Mat, each process can have a different hash factor
2462 
2463    Level: advanced
2464 
2465   Notes:
2466    This can also be set by the command line option: -mat_use_hash_table fact
2467 
2468 .keywords: matrix, hashtable, factor, HT
2469 
2470 .seealso: MatSetOption()
2471 @XXXXX*/
2472 
2473 
2474 #undef __FUNCT__
2475 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2476 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2477 {
2478   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2479   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2480   PetscReal      atmp;
2481   PetscReal      *work,*svalues,*rvalues;
2482   PetscErrorCode ierr;
2483   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2484   PetscMPIInt    rank,size;
2485   PetscInt       *rowners_bs,dest,count,source;
2486   PetscScalar    *va;
2487   MatScalar      *ba;
2488   MPI_Status     stat;
2489 
2490   PetscFunctionBegin;
2491   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2492   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2493   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2494 
2495   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2496   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2497 
2498   bs  = A->rmap->bs;
2499   mbs = a->mbs;
2500   Mbs = a->Mbs;
2501   ba  = b->a;
2502   bi  = b->i;
2503   bj  = b->j;
2504 
2505   /* find ownerships */
2506   rowners_bs = A->rmap->range;
2507 
2508   /* each proc creates an array to be distributed */
2509   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2510   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2511 
2512   /* row_max for B */
2513   if (rank != size-1) {
2514     for (i=0; i<mbs; i++) {
2515       ncols = bi[1] - bi[0]; bi++;
2516       brow  = bs*i;
2517       for (j=0; j<ncols; j++) {
2518         bcol = bs*(*bj);
2519         for (kcol=0; kcol<bs; kcol++) {
2520           col  = bcol + kcol;                /* local col index */
2521           col += rowners_bs[rank+1];      /* global col index */
2522           for (krow=0; krow<bs; krow++) {
2523             atmp = PetscAbsScalar(*ba); ba++;
2524             row  = brow + krow;   /* local row index */
2525             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2526             if (work[col] < atmp) work[col] = atmp;
2527           }
2528         }
2529         bj++;
2530       }
2531     }
2532 
2533     /* send values to its owners */
2534     for (dest=rank+1; dest<size; dest++) {
2535       svalues = work + rowners_bs[dest];
2536       count   = rowners_bs[dest+1]-rowners_bs[dest];
2537       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2538     }
2539   }
2540 
2541   /* receive values */
2542   if (rank) {
2543     rvalues = work;
2544     count   = rowners_bs[rank+1]-rowners_bs[rank];
2545     for (source=0; source<rank; source++) {
2546       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2547       /* process values */
2548       for (i=0; i<count; i++) {
2549         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2550       }
2551     }
2552   }
2553 
2554   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2555   ierr = PetscFree(work);CHKERRQ(ierr);
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 #undef __FUNCT__
2560 #define __FUNCT__ "MatSOR_MPISBAIJ"
2561 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2562 {
2563   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2564   PetscErrorCode    ierr;
2565   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2566   PetscScalar       *x,*ptr,*from;
2567   Vec               bb1;
2568   const PetscScalar *b;
2569 
2570   PetscFunctionBegin;
2571   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2572   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2573 
2574   if (flag == SOR_APPLY_UPPER) {
2575     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2576     PetscFunctionReturn(0);
2577   }
2578 
2579   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2580     if (flag & SOR_ZERO_INITIAL_GUESS) {
2581       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2582       its--;
2583     }
2584 
2585     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2586     while (its--) {
2587 
2588       /* lower triangular part: slvec0b = - B^T*xx */
2589       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2590 
2591       /* copy xx into slvec0a */
2592       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2593       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2594       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2595       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2596 
2597       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2598 
2599       /* copy bb into slvec1a */
2600       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2601       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2602       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2603       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2604 
2605       /* set slvec1b = 0 */
2606       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2607 
2608       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2609       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2610       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2611       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2612 
2613       /* upper triangular part: bb1 = bb1 - B*x */
2614       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2615 
2616       /* local diagonal sweep */
2617       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2618     }
2619     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2620   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2621     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2622   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2623     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2624   } else if (flag & SOR_EISENSTAT) {
2625     Vec               xx1;
2626     PetscBool         hasop;
2627     const PetscScalar *diag;
2628     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2629     PetscInt          i,n;
2630 
2631     if (!mat->xx1) {
2632       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2633       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2634     }
2635     xx1 = mat->xx1;
2636     bb1 = mat->bb1;
2637 
2638     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2639 
2640     if (!mat->diag) {
2641       /* this is wrong for same matrix with new nonzero values */
2642       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2643       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2644     }
2645     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2646 
2647     if (hasop) {
2648       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2649       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2650     } else {
2651       /*
2652           These two lines are replaced by code that may be a bit faster for a good compiler
2653       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2654       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2655       */
2656       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2657       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2658       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2659       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2660       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2661       if (omega == 1.0) {
2662         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2663         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2664       } else {
2665         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2666         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2667       }
2668       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2669       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2670       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2671       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2672     }
2673 
2674     /* multiply off-diagonal portion of matrix */
2675     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2676     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2677     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2678     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2679     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2680     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2681     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2682     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2683     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2684     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2685 
2686     /* local sweep */
2687     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2688     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2689   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2690   PetscFunctionReturn(0);
2691 }
2692 
2693 #undef __FUNCT__
2694 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2695 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2696 {
2697   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2698   PetscErrorCode ierr;
2699   Vec            lvec1,bb1;
2700 
2701   PetscFunctionBegin;
2702   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2703   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2704 
2705   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2706     if (flag & SOR_ZERO_INITIAL_GUESS) {
2707       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2708       its--;
2709     }
2710 
2711     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2712     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2713     while (its--) {
2714       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2715 
2716       /* lower diagonal part: bb1 = bb - B^T*xx */
2717       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2718       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2719 
2720       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2721       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2722       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2723 
2724       /* upper diagonal part: bb1 = bb1 - B*x */
2725       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2726       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2727 
2728       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2729 
2730       /* diagonal sweep */
2731       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2732     }
2733     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2734     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2735   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2741 /*@
2742      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2743          CSR format the local rows.
2744 
2745    Collective on MPI_Comm
2746 
2747    Input Parameters:
2748 +  comm - MPI communicator
2749 .  bs - the block size, only a block size of 1 is supported
2750 .  m - number of local rows (Cannot be PETSC_DECIDE)
2751 .  n - This value should be the same as the local size used in creating the
2752        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2753        calculated if N is given) For square matrices n is almost always m.
2754 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2755 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2756 .   i - row indices
2757 .   j - column indices
2758 -   a - matrix values
2759 
2760    Output Parameter:
2761 .   mat - the matrix
2762 
2763    Level: intermediate
2764 
2765    Notes:
2766        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2767      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2768      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2769 
2770        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2771 
2772 .keywords: matrix, aij, compressed row, sparse, parallel
2773 
2774 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2775           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2776 @*/
2777 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2778 {
2779   PetscErrorCode ierr;
2780 
2781 
2782   PetscFunctionBegin;
2783   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2784   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2785   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2786   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2787   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2788   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2789   PetscFunctionReturn(0);
2790 }
2791 
2792 
2793 #undef __FUNCT__
2794 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2795 /*@C
2796    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2797    (the default parallel PETSc format).
2798 
2799    Collective on MPI_Comm
2800 
2801    Input Parameters:
2802 +  B - the matrix
2803 .  bs - the block size
2804 .  i - the indices into j for the start of each local row (starts with zero)
2805 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2806 -  v - optional values in the matrix
2807 
2808    Level: developer
2809 
2810 .keywords: matrix, aij, compressed row, sparse, parallel
2811 
2812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2813 @*/
2814 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2815 {
2816   PetscErrorCode ierr;
2817 
2818   PetscFunctionBegin;
2819   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 #undef __FUNCT__
2824 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
2825 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2826 {
2827   PetscErrorCode ierr;
2828   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2829   PetscInt       *indx;
2830   PetscScalar    *values;
2831 
2832   PetscFunctionBegin;
2833   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2834   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2835     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2836     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
2837     PetscInt       *bindx,rmax=a->rmax,j;
2838 
2839     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2840     mbs = m/bs; Nbs = N/cbs;
2841     if (n == PETSC_DECIDE) {
2842       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
2843     }
2844     /* Check sum(n) = Nbs */
2845     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2846     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
2847 
2848     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2849     rstart -= mbs;
2850 
2851     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2852     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
2853     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2854     for (i=0; i<mbs; i++) {
2855       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2856       nnz = nnz/bs;
2857       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2858       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2859       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2860     }
2861     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2862     ierr = PetscFree(bindx);CHKERRQ(ierr);
2863 
2864     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2865     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2866     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2867     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2868     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2869     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2870   }
2871 
2872   /* numeric phase */
2873   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2874   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2875 
2876   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2877   for (i=0; i<m; i++) {
2878     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2879     Ii   = i + rstart;
2880     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2881     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2882   }
2883   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2884   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2885   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2886   PetscFunctionReturn(0);
2887 }
2888