xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision bca7b5116557ba4826c3fc1fb8ffaafef75aa3cc)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 #if defined(PETSC_HAVE_ELEMENTAL)
23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
24 #endif
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
28 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
29 {
30   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
31   PetscErrorCode ierr;
32 
33   PetscFunctionBegin;
34   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
35   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
53   { \
54  \
55     brow = row/bs;  \
56     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
57     rmax = aimax[brow]; nrow = ailen[brow]; \
58     bcol = col/bs; \
59     ridx = row % bs; cidx = col % bs; \
60     low  = 0; high = nrow; \
61     while (high-low > 3) { \
62       t = (low+high)/2; \
63       if (rp[t] > bcol) high = t; \
64       else              low  = t; \
65     } \
66     for (_i=low; _i<high; _i++) { \
67       if (rp[_i] > bcol) break; \
68       if (rp[_i] == bcol) { \
69         bap = ap + bs2*_i + bs*cidx + ridx; \
70         if (addv == ADD_VALUES) *bap += value;  \
71         else                    *bap  = value;  \
72         goto a_noinsert; \
73       } \
74     } \
75     if (a->nonew == 1) goto a_noinsert; \
76     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
77     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
78     N = nrow++ - 1;  \
79     /* shift up all the later entries in this row */ \
80     for (ii=N; ii>=_i; ii--) { \
81       rp[ii+1] = rp[ii]; \
82       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
83     } \
84     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
85     rp[_i]                      = bcol;  \
86     ap[bs2*_i + bs*cidx + ridx] = value;  \
87     A->nonzerostate++;\
88 a_noinsert:; \
89     ailen[brow] = nrow; \
90   }
91 
92 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
93   { \
94     brow = row/bs;  \
95     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
96     rmax = bimax[brow]; nrow = bilen[brow]; \
97     bcol = col/bs; \
98     ridx = row % bs; cidx = col % bs; \
99     low  = 0; high = nrow; \
100     while (high-low > 3) { \
101       t = (low+high)/2; \
102       if (rp[t] > bcol) high = t; \
103       else              low  = t; \
104     } \
105     for (_i=low; _i<high; _i++) { \
106       if (rp[_i] > bcol) break; \
107       if (rp[_i] == bcol) { \
108         bap = ap + bs2*_i + bs*cidx + ridx; \
109         if (addv == ADD_VALUES) *bap += value;  \
110         else                    *bap  = value;  \
111         goto b_noinsert; \
112       } \
113     } \
114     if (b->nonew == 1) goto b_noinsert; \
115     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
116     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
117     N = nrow++ - 1;  \
118     /* shift up all the later entries in this row */ \
119     for (ii=N; ii>=_i; ii--) { \
120       rp[ii+1] = rp[ii]; \
121       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122     } \
123     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
124     rp[_i]                      = bcol;  \
125     ap[bs2*_i + bs*cidx + ridx] = value;  \
126     B->nonzerostate++;\
127 b_noinsert:; \
128     bilen[brow] = nrow; \
129   }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat          A     = baij->A;
149   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar    *aa   =a->a;
152 
153   Mat         B     = baij->B;
154   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar   *ba   =b->a;
157 
158   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar *ap,*bap;
161 
162   /* for stash */
163   PetscInt  n_loc, *in_loc = NULL;
164   MatScalar *v_loc = NULL;
165 
166   PetscFunctionBegin;
167   if (!baij->donotstash) {
168     if (n > baij->n_loc) {
169       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
170       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
171       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
173 
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs) {
189           if (a->ignore_ltriangular) {
190             continue;    /* ignore lower triangular blocks */
191           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
192         }
193         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
194           col  = in[j] - cstart_orig;         /* local col index */
195           brow = row/bs; bcol = col/bs;
196           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
197           if (roworiented) value = v[i*n+j];
198           else             value = v[i+j*m];
199           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
200           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
201         } else if (in[j] < 0) continue;
202 #if defined(PETSC_USE_DEBUG)
203         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
204 #endif
205         else {  /* off-diag entry (B) */
206           if (mat->was_assembled) {
207             if (!baij->colmap) {
208               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
209             }
210 #if defined(PETSC_USE_CTABLE)
211             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
212             col  = col - 1;
213 #else
214             col = baij->colmap[in[j]/bs] - 1;
215 #endif
216             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
217               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
218               col  =  in[j];
219               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
220               B    = baij->B;
221               b    = (Mat_SeqBAIJ*)(B)->data;
222               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
223               ba   = b->a;
224             } else col += in[j]%bs;
225           } else col = in[j];
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         mat->assembled = PETSC_FALSE;
236         n_loc          = 0;
237         for (j=0; j<n; j++) {
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray     =baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if (!barray) {
269     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if ((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++ = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend) {
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         } else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined(PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined(PETSC_USE_CTABLE)
333             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col  = in[j];
341             }
342           } else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
348       if (!baij->donotstash) {
349         if (roworiented) {
350           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         } else {
352           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         }
354       }
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatGetValues_MPISBAIJ"
362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
363 {
364   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
365   PetscErrorCode ierr;
366   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
367   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
368 
369   PetscFunctionBegin;
370   for (i=0; i<m; i++) {
371     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
372     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
373     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
374       row = idxm[i] - bsrstart;
375       for (j=0; j<n; j++) {
376         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
377         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
378         if (idxn[j] >= bscstart && idxn[j] < bscend) {
379           col  = idxn[j] - bscstart;
380           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
381         } else {
382           if (!baij->colmap) {
383             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
384           }
385 #if defined(PETSC_USE_CTABLE)
386           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
387           data--;
388 #else
389           data = baij->colmap[idxn[j]/bs]-1;
390 #endif
391           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
392           else {
393             col  = data + idxn[j]%bs;
394             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
395           }
396         }
397       }
398     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
399   }
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNCT__
404 #define __FUNCT__ "MatNorm_MPISBAIJ"
405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
406 {
407   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscReal      sum[2],*lnorm2;
410 
411   PetscFunctionBegin;
412   if (baij->size == 1) {
413     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
414   } else {
415     if (type == NORM_FROBENIUS) {
416       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
417       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
418       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
419       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
420       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
421       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
422       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
423       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
424     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
425       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
426       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
427       PetscReal    *rsum,*rsum2,vabs;
428       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
429       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
430       MatScalar    *v;
431 
432       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
433       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz   = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++) {
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++) {
442             for (row=0; row<bs; row++) {
443               vabs            = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++) {
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++) {
459             for (row=0; row<bs; row++) {
460               vabs            = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap->N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
473     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
474   }
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
483   PetscErrorCode ierr;
484   PetscInt       nstash,reallocs;
485   InsertMode     addv;
486 
487   PetscFunctionBegin;
488   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
489 
490   /* make sure all processors are either in INSERTMODE or ADDMODE */
491   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
492   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
493   mat->insertmode = addv; /* in case this processor had no cache */
494 
495   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
496   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
497   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
498   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
499   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
500   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
507 {
508   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
509   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
510   PetscErrorCode ierr;
511   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
512   PetscInt       *row,*col;
513   PetscBool      other_disassembled;
514   PetscMPIInt    n;
515   PetscBool      r1,r2,r3;
516   MatScalar      *val;
517   InsertMode     addv = mat->insertmode;
518 
519   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
520   PetscFunctionBegin;
521   if (!baij->donotstash &&  !mat->nooffprocentries) {
522     while (1) {
523       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
524       if (!flg) break;
525 
526       for (i=0; i<n;) {
527         /* Now identify the consecutive vals belonging to the same row */
528         for (j=i,rstart=row[j]; j<n; j++) {
529           if (row[j] != rstart) break;
530         }
531         if (j < n) ncols = j-i;
532         else       ncols = n-i;
533         /* Now assemble all these values with a single function call */
534         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
535         i    = j;
536       }
537     }
538     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
539     /* Now process the block-stash. Since the values are stashed column-oriented,
540        set the roworiented flag to column oriented, and after MatSetValues()
541        restore the original flags */
542     r1 = baij->roworiented;
543     r2 = a->roworiented;
544     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
545 
546     baij->roworiented = PETSC_FALSE;
547     a->roworiented    = PETSC_FALSE;
548 
549     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
550     while (1) {
551       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
552       if (!flg) break;
553 
554       for (i=0; i<n;) {
555         /* Now identify the consecutive vals belonging to the same row */
556         for (j=i,rstart=row[j]; j<n; j++) {
557           if (row[j] != rstart) break;
558         }
559         if (j < n) ncols = j-i;
560         else       ncols = n-i;
561         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
562         i    = j;
563       }
564     }
565     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
566 
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569 
570     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
571   }
572 
573   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
591   }
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
596 
597   baij->rowvalues = 0;
598 
599   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
600   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
601     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
602     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
609 #include <petscdraw.h>
610 #undef __FUNCT__
611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
613 {
614   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
615   PetscErrorCode    ierr;
616   PetscInt          bs   = mat->rmap->bs;
617   PetscMPIInt       rank = baij->rank;
618   PetscBool         iascii,isdraw;
619   PetscViewer       sviewer;
620   PetscViewerFormat format;
621 
622   PetscFunctionBegin;
623   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
624   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
625   if (iascii) {
626     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
627     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
628       MatInfo info;
629       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
630       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
633       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
634       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
635       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
636       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
637       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
638       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
639       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
640       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
641       PetscFunctionReturn(0);
642     } else if (format == PETSC_VIEWER_ASCII_INFO) {
643       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
644       PetscFunctionReturn(0);
645     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
646       PetscFunctionReturn(0);
647     }
648   }
649 
650   if (isdraw) {
651     PetscDraw draw;
652     PetscBool isnull;
653     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
654     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
655   }
656 
657   {
658     /* assemble the entire matrix onto first processor. */
659     Mat          A;
660     Mat_SeqSBAIJ *Aloc;
661     Mat_SeqBAIJ  *Bloc;
662     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
663     MatScalar    *a;
664     const char   *matname;
665 
666     /* Should this be the same type as mat? */
667     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
668     if (!rank) {
669       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
670     } else {
671       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
672     }
673     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
674     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
675     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
676     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
677 
678     /* copy over the A part */
679     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
680     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
681     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
682 
683     for (i=0; i<mbs; i++) {
684       rvals[0] = bs*(baij->rstartbs + i);
685       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
686       for (j=ai[i]; j<ai[i+1]; j++) {
687         col = (baij->cstartbs+aj[j])*bs;
688         for (k=0; k<bs; k++) {
689           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
690           col++;
691           a += bs;
692         }
693       }
694     }
695     /* copy over the B part */
696     Bloc = (Mat_SeqBAIJ*)baij->B->data;
697     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
698     for (i=0; i<mbs; i++) {
699 
700       rvals[0] = bs*(baij->rstartbs + i);
701       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
702       for (j=ai[i]; j<ai[i+1]; j++) {
703         col = baij->garray[aj[j]]*bs;
704         for (k=0; k<bs; k++) {
705           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
706           col++;
707           a += bs;
708         }
709       }
710     }
711     ierr = PetscFree(rvals);CHKERRQ(ierr);
712     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
713     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
714     /*
715        Everyone has to call to draw the matrix since the graphics waits are
716        synchronized across all processors that share the PetscDraw object
717     */
718     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
719     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
722       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
723     }
724     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
725     ierr = MatDestroy(&A);CHKERRQ(ierr);
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatView_MPISBAIJ"
732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
733 {
734   PetscErrorCode ierr;
735   PetscBool      iascii,isdraw,issocket,isbinary;
736 
737   PetscFunctionBegin;
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
739   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
740   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
741   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
742   if (iascii || isdraw || issocket || isbinary) {
743     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatDestroy_MPISBAIJ"
750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
751 {
752   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
753   PetscErrorCode ierr;
754 
755   PetscFunctionBegin;
756 #if defined(PETSC_USE_LOG)
757   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
758 #endif
759   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
760   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
761   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
762   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
763 #if defined(PETSC_USE_CTABLE)
764   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
765 #else
766   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
767 #endif
768   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
770   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
774   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
776   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
777   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
778   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
779   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
780   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
781   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
782   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
783 #if defined(PETSC_USE_REAL_MAT_SINGLE)
784   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
785 #endif
786   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
788   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
789   ierr = PetscFree(mat->data);CHKERRQ(ierr);
790 
791   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
795   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
796   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
797 #if defined(PETSC_HAVE_ELEMENTAL)
798   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
799 #endif
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
805 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
806 {
807   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
808   PetscErrorCode ierr;
809   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
810   PetscScalar    *x,*from;
811 
812   PetscFunctionBegin;
813   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
814   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
815 
816   /* diagonal part */
817   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
818   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
819 
820   /* subdiagonal part */
821   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
822 
823   /* copy x into the vec slvec0 */
824   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
825   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
826 
827   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
828   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
829   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
830 
831   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
832   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
833   /* supperdiagonal part */
834   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatMult_MPISBAIJ"
840 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
841 {
842   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
843   PetscErrorCode    ierr;
844   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
845   PetscScalar       *from;
846   const PetscScalar *x;
847 
848   PetscFunctionBegin;
849   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
850   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
851 
852   /* diagonal part */
853   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
854   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
855 
856   /* subdiagonal part */
857   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
858 
859   /* copy x into the vec slvec0 */
860   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
861   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
862 
863   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
864   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
865   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
866 
867   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   /* supperdiagonal part */
870   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
876 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
877 {
878   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
879   PetscErrorCode ierr;
880   PetscInt       nt;
881 
882   PetscFunctionBegin;
883   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
884   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
885 
886   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
887   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
888 
889   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890   /* do diagonal part */
891   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
892   /* do supperdiagonal part */
893   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
895   /* do subdiagonal part */
896   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
897   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
898   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
904 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
905 {
906   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
907   PetscErrorCode    ierr;
908   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
909   PetscScalar       *from,zero=0.0;
910   const PetscScalar *x;
911 
912   PetscFunctionBegin;
913   /*
914   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
915   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
916   */
917   /* diagonal part */
918   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
919   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
920 
921   /* subdiagonal part */
922   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
923 
924   /* copy x into the vec slvec0 */
925   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
926   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
927   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
928   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
929 
930   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
932   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
933 
934   /* supperdiagonal part */
935   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
941 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
942 {
943   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
944   PetscErrorCode ierr;
945 
946   PetscFunctionBegin;
947   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   /* do diagonal part */
949   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
950   /* do supperdiagonal part */
951   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
953 
954   /* do subdiagonal part */
955   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
956   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
957   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 /*
962   This only works correctly for square matrices where the subblock A->A is the
963    diagonal block
964 */
965 #undef __FUNCT__
966 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
967 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
968 {
969   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
974   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatScale_MPISBAIJ"
980 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
981 {
982   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
987   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatGetRow_MPISBAIJ"
993 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
994 {
995   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
996   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
997   PetscErrorCode ierr;
998   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
999   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1000   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1001 
1002   PetscFunctionBegin;
1003   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1004   mat->getrowactive = PETSC_TRUE;
1005 
1006   if (!mat->rowvalues && (idx || v)) {
1007     /*
1008         allocate enough space to hold information from the longest row.
1009     */
1010     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1011     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1012     PetscInt     max = 1,mbs = mat->mbs,tmp;
1013     for (i=0; i<mbs; i++) {
1014       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1015       if (max < tmp) max = tmp;
1016     }
1017     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1018   }
1019 
1020   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1021   lrow = row - brstart;  /* local row index */
1022 
1023   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1024   if (!v)   {pvA = 0; pvB = 0;}
1025   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1026   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1027   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1028   nztot = nzA + nzB;
1029 
1030   cmap = mat->garray;
1031   if (v  || idx) {
1032     if (nztot) {
1033       /* Sort by increasing column numbers, assuming A and B already sorted */
1034       PetscInt imark = -1;
1035       if (v) {
1036         *v = v_p = mat->rowvalues;
1037         for (i=0; i<nzB; i++) {
1038           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1039           else break;
1040         }
1041         imark = i;
1042         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1043         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1044       }
1045       if (idx) {
1046         *idx = idx_p = mat->rowindices;
1047         if (imark > -1) {
1048           for (i=0; i<imark; i++) {
1049             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1050           }
1051         } else {
1052           for (i=0; i<nzB; i++) {
1053             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1054             else break;
1055           }
1056           imark = i;
1057         }
1058         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1059         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1060       }
1061     } else {
1062       if (idx) *idx = 0;
1063       if (v)   *v   = 0;
1064     }
1065   }
1066   *nz  = nztot;
1067   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1068   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1075 {
1076   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1077 
1078   PetscFunctionBegin;
1079   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1080   baij->getrowactive = PETSC_FALSE;
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1086 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1087 {
1088   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1089   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1090 
1091   PetscFunctionBegin;
1092   aA->getrow_utriangular = PETSC_TRUE;
1093   PetscFunctionReturn(0);
1094 }
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1097 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1098 {
1099   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1100   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1101 
1102   PetscFunctionBegin;
1103   aA->getrow_utriangular = PETSC_FALSE;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1109 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1110 {
1111   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1116   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1122 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1123 {
1124   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1129   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1134    Input: isrow       - distributed(parallel),
1135           iscol_local - locally owned (seq)
1136 */
1137 #undef __FUNCT__
1138 #define __FUNCT__ "ISEqual_private"
1139 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1140 {
1141   PetscErrorCode ierr;
1142   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1143   const PetscInt *ptr1,*ptr2;
1144 
1145   PetscFunctionBegin;
1146   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1147   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1148   if (sz1 > sz2) {
1149     *flg = PETSC_FALSE;
1150     PetscFunctionReturn(0);
1151   }
1152 
1153   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1154   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1155 
1156   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1157   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1158   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1159   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1160   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1161   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1162 
1163   nmatch=0;
1164   k     = 0;
1165   for (i=0; i<sz1; i++){
1166     for (j=k; j<sz2; j++){
1167       if (a1[i] == a2[j]) {
1168         k = j; nmatch++;
1169         break;
1170       }
1171     }
1172   }
1173   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1174   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1175   ierr = PetscFree(a1);CHKERRQ(ierr);
1176   ierr = PetscFree(a2);CHKERRQ(ierr);
1177   if (nmatch < sz1) {
1178     *flg = PETSC_FALSE;
1179   } else {
1180     *flg = PETSC_TRUE;
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1187 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1188 {
1189   PetscErrorCode ierr;
1190   IS             iscol_local;
1191   PetscInt       csize;
1192   PetscBool      isequal;
1193 
1194   PetscFunctionBegin;
1195   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1196   if (call == MAT_REUSE_MATRIX) {
1197     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1198     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1199   } else {
1200     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1201     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1202     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1203   }
1204 
1205   /* now call MatGetSubMatrix_MPIBAIJ() */
1206   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1207   if (call == MAT_INITIAL_MATRIX) {
1208     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1209     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1210   }
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1216 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1217 {
1218   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1219   PetscErrorCode ierr;
1220 
1221   PetscFunctionBegin;
1222   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1223   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1229 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1230 {
1231   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1232   Mat            A  = a->A,B = a->B;
1233   PetscErrorCode ierr;
1234   PetscReal      isend[5],irecv[5];
1235 
1236   PetscFunctionBegin;
1237   info->block_size = (PetscReal)matin->rmap->bs;
1238 
1239   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1240 
1241   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1242   isend[3] = info->memory;  isend[4] = info->mallocs;
1243 
1244   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1245 
1246   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1247   isend[3] += info->memory;  isend[4] += info->mallocs;
1248   if (flag == MAT_LOCAL) {
1249     info->nz_used      = isend[0];
1250     info->nz_allocated = isend[1];
1251     info->nz_unneeded  = isend[2];
1252     info->memory       = isend[3];
1253     info->mallocs      = isend[4];
1254   } else if (flag == MAT_GLOBAL_MAX) {
1255     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1256 
1257     info->nz_used      = irecv[0];
1258     info->nz_allocated = irecv[1];
1259     info->nz_unneeded  = irecv[2];
1260     info->memory       = irecv[3];
1261     info->mallocs      = irecv[4];
1262   } else if (flag == MAT_GLOBAL_SUM) {
1263     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1264 
1265     info->nz_used      = irecv[0];
1266     info->nz_allocated = irecv[1];
1267     info->nz_unneeded  = irecv[2];
1268     info->memory       = irecv[3];
1269     info->mallocs      = irecv[4];
1270   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1271   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1272   info->fill_ratio_needed = 0;
1273   info->factor_mallocs    = 0;
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1279 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1280 {
1281   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1282   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1283   PetscErrorCode ierr;
1284 
1285   PetscFunctionBegin;
1286   switch (op) {
1287   case MAT_NEW_NONZERO_LOCATIONS:
1288   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1289   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1290   case MAT_KEEP_NONZERO_PATTERN:
1291   case MAT_NEW_NONZERO_LOCATION_ERR:
1292     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1293     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1294     break;
1295   case MAT_ROW_ORIENTED:
1296     a->roworiented = flg;
1297 
1298     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1299     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1300     break;
1301   case MAT_NEW_DIAGONALS:
1302     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1303     break;
1304   case MAT_IGNORE_OFF_PROC_ENTRIES:
1305     a->donotstash = flg;
1306     break;
1307   case MAT_USE_HASH_TABLE:
1308     a->ht_flag = flg;
1309     break;
1310   case MAT_HERMITIAN:
1311     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1312     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1313 
1314     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1315     break;
1316   case MAT_SPD:
1317     A->spd_set = PETSC_TRUE;
1318     A->spd     = flg;
1319     if (flg) {
1320       A->symmetric                  = PETSC_TRUE;
1321       A->structurally_symmetric     = PETSC_TRUE;
1322       A->symmetric_set              = PETSC_TRUE;
1323       A->structurally_symmetric_set = PETSC_TRUE;
1324     }
1325     break;
1326   case MAT_SYMMETRIC:
1327     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1328     break;
1329   case MAT_STRUCTURALLY_SYMMETRIC:
1330     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1331     break;
1332   case MAT_SYMMETRY_ETERNAL:
1333     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1334     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1335     break;
1336   case MAT_IGNORE_LOWER_TRIANGULAR:
1337     aA->ignore_ltriangular = flg;
1338     break;
1339   case MAT_ERROR_LOWER_TRIANGULAR:
1340     aA->ignore_ltriangular = flg;
1341     break;
1342   case MAT_GETROW_UPPERTRIANGULAR:
1343     aA->getrow_utriangular = flg;
1344     break;
1345   default:
1346     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1347   }
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1353 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1354 {
1355   PetscErrorCode ierr;
1356 
1357   PetscFunctionBegin;
1358   if (MAT_INITIAL_MATRIX || *B != A) {
1359     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1366 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1367 {
1368   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1369   Mat            a     = baij->A, b=baij->B;
1370   PetscErrorCode ierr;
1371   PetscInt       nv,m,n;
1372   PetscBool      flg;
1373 
1374   PetscFunctionBegin;
1375   if (ll != rr) {
1376     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1377     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1378   }
1379   if (!ll) PetscFunctionReturn(0);
1380 
1381   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1382   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1383 
1384   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1385   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1386 
1387   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1388 
1389   /* left diagonalscale the off-diagonal part */
1390   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1391 
1392   /* scale the diagonal part */
1393   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1394 
1395   /* right diagonalscale the off-diagonal part */
1396   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1397   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1403 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1404 {
1405   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1406   PetscErrorCode ierr;
1407 
1408   PetscFunctionBegin;
1409   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatEqual_MPISBAIJ"
1417 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1418 {
1419   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1420   Mat            a,b,c,d;
1421   PetscBool      flg;
1422   PetscErrorCode ierr;
1423 
1424   PetscFunctionBegin;
1425   a = matA->A; b = matA->B;
1426   c = matB->A; d = matB->B;
1427 
1428   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1429   if (flg) {
1430     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1431   }
1432   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatCopy_MPISBAIJ"
1438 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1439 {
1440   PetscErrorCode ierr;
1441   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1442   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1443 
1444   PetscFunctionBegin;
1445   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1446   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1447     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1448     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1449     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1450   } else {
1451     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1452     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1459 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1460 {
1461   PetscErrorCode ierr;
1462 
1463   PetscFunctionBegin;
1464   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1470 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1471 {
1472   PetscErrorCode ierr;
1473   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1474   PetscBLASInt   bnz,one=1;
1475   Mat_SeqSBAIJ   *xa,*ya;
1476   Mat_SeqBAIJ    *xb,*yb;
1477 
1478   PetscFunctionBegin;
1479   if (str == SAME_NONZERO_PATTERN) {
1480     PetscScalar alpha = a;
1481     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1482     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1483     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1484     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1485     xb   = (Mat_SeqBAIJ*)xx->B->data;
1486     yb   = (Mat_SeqBAIJ*)yy->B->data;
1487     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1488     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1489     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1490   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1491     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1492     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1493     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1494   } else {
1495     Mat      B;
1496     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1497     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1498     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1499     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1500     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1501     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1502     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1503     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1504     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1505     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1506     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1507     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1508     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1509     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1510     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1511     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1512     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1513     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1514     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1515     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1516   }
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1522 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1523 {
1524   PetscErrorCode ierr;
1525   PetscInt       i;
1526   PetscBool      flg;
1527 
1528   PetscFunctionBegin;
1529   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1530   for (i=0; i<n; i++) {
1531     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1532     if (!flg) { /* *B[i] is non-symmetric, set flag */
1533       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1534     }
1535   }
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 /* -------------------------------------------------------------------*/
1540 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1541                                        MatGetRow_MPISBAIJ,
1542                                        MatRestoreRow_MPISBAIJ,
1543                                        MatMult_MPISBAIJ,
1544                                /*  4*/ MatMultAdd_MPISBAIJ,
1545                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1546                                        MatMultAdd_MPISBAIJ,
1547                                        0,
1548                                        0,
1549                                        0,
1550                                /* 10*/ 0,
1551                                        0,
1552                                        0,
1553                                        MatSOR_MPISBAIJ,
1554                                        MatTranspose_MPISBAIJ,
1555                                /* 15*/ MatGetInfo_MPISBAIJ,
1556                                        MatEqual_MPISBAIJ,
1557                                        MatGetDiagonal_MPISBAIJ,
1558                                        MatDiagonalScale_MPISBAIJ,
1559                                        MatNorm_MPISBAIJ,
1560                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1561                                        MatAssemblyEnd_MPISBAIJ,
1562                                        MatSetOption_MPISBAIJ,
1563                                        MatZeroEntries_MPISBAIJ,
1564                                /* 24*/ 0,
1565                                        0,
1566                                        0,
1567                                        0,
1568                                        0,
1569                                /* 29*/ MatSetUp_MPISBAIJ,
1570                                        0,
1571                                        0,
1572                                        0,
1573                                        0,
1574                                /* 34*/ MatDuplicate_MPISBAIJ,
1575                                        0,
1576                                        0,
1577                                        0,
1578                                        0,
1579                                /* 39*/ MatAXPY_MPISBAIJ,
1580                                        MatGetSubMatrices_MPISBAIJ,
1581                                        MatIncreaseOverlap_MPISBAIJ,
1582                                        MatGetValues_MPISBAIJ,
1583                                        MatCopy_MPISBAIJ,
1584                                /* 44*/ 0,
1585                                        MatScale_MPISBAIJ,
1586                                        0,
1587                                        0,
1588                                        0,
1589                                /* 49*/ 0,
1590                                        0,
1591                                        0,
1592                                        0,
1593                                        0,
1594                                /* 54*/ 0,
1595                                        0,
1596                                        MatSetUnfactored_MPISBAIJ,
1597                                        0,
1598                                        MatSetValuesBlocked_MPISBAIJ,
1599                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1600                                        0,
1601                                        0,
1602                                        0,
1603                                        0,
1604                                /* 64*/ 0,
1605                                        0,
1606                                        0,
1607                                        0,
1608                                        0,
1609                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1610                                        0,
1611                                        0,
1612                                        0,
1613                                        0,
1614                                /* 74*/ 0,
1615                                        0,
1616                                        0,
1617                                        0,
1618                                        0,
1619                                /* 79*/ 0,
1620                                        0,
1621                                        0,
1622                                        0,
1623                                        MatLoad_MPISBAIJ,
1624                                /* 84*/ 0,
1625                                        0,
1626                                        0,
1627                                        0,
1628                                        0,
1629                                /* 89*/ 0,
1630                                        0,
1631                                        0,
1632                                        0,
1633                                        0,
1634                                /* 94*/ 0,
1635                                        0,
1636                                        0,
1637                                        0,
1638                                        0,
1639                                /* 99*/ 0,
1640                                        0,
1641                                        0,
1642                                        0,
1643                                        0,
1644                                /*104*/ 0,
1645                                        MatRealPart_MPISBAIJ,
1646                                        MatImaginaryPart_MPISBAIJ,
1647                                        MatGetRowUpperTriangular_MPISBAIJ,
1648                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1649                                /*109*/ 0,
1650                                        0,
1651                                        0,
1652                                        0,
1653                                        0,
1654                                /*114*/ 0,
1655                                        0,
1656                                        0,
1657                                        0,
1658                                        0,
1659                                /*119*/ 0,
1660                                        0,
1661                                        0,
1662                                        0,
1663                                        0,
1664                                /*124*/ 0,
1665                                        0,
1666                                        0,
1667                                        0,
1668                                        0,
1669                                /*129*/ 0,
1670                                        0,
1671                                        0,
1672                                        0,
1673                                        0,
1674                                /*134*/ 0,
1675                                        0,
1676                                        0,
1677                                        0,
1678                                        0,
1679                                /*139*/ 0,
1680                                        0,
1681                                        0,
1682                                        0,
1683                                        0,
1684                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1685 };
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1689 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1690 {
1691   PetscFunctionBegin;
1692   *a = ((Mat_MPISBAIJ*)A->data)->A;
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1698 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1699 {
1700   Mat_MPISBAIJ   *b;
1701   PetscErrorCode ierr;
1702   PetscInt       i,mbs,Mbs;
1703 
1704   PetscFunctionBegin;
1705   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1706   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1707   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1708   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1709 
1710   b   = (Mat_MPISBAIJ*)B->data;
1711   mbs = B->rmap->n/bs;
1712   Mbs = B->rmap->N/bs;
1713   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1714 
1715   B->rmap->bs = bs;
1716   b->bs2      = bs*bs;
1717   b->mbs      = mbs;
1718   b->Mbs      = Mbs;
1719   b->nbs      = B->cmap->n/bs;
1720   b->Nbs      = B->cmap->N/bs;
1721 
1722   for (i=0; i<=b->size; i++) {
1723     b->rangebs[i] = B->rmap->range[i]/bs;
1724   }
1725   b->rstartbs = B->rmap->rstart/bs;
1726   b->rendbs   = B->rmap->rend/bs;
1727 
1728   b->cstartbs = B->cmap->rstart/bs;
1729   b->cendbs   = B->cmap->rend/bs;
1730 
1731   if (!B->preallocated) {
1732     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1733     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1734     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1735     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1736     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1737     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1738     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1739     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1740     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1741   }
1742 
1743   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1744   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1745 
1746   B->preallocated = PETSC_TRUE;
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1752 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1753 {
1754   PetscInt       m,rstart,cstart,cend;
1755   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1756   const PetscInt *JJ    =0;
1757   PetscScalar    *values=0;
1758   PetscErrorCode ierr;
1759 
1760   PetscFunctionBegin;
1761   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1762   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1763   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1764   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1765   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1766   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1767   m      = B->rmap->n/bs;
1768   rstart = B->rmap->rstart/bs;
1769   cstart = B->cmap->rstart/bs;
1770   cend   = B->cmap->rend/bs;
1771 
1772   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1773   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1774   for (i=0; i<m; i++) {
1775     nz = ii[i+1] - ii[i];
1776     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1777     nz_max = PetscMax(nz_max,nz);
1778     JJ     = jj + ii[i];
1779     for (j=0; j<nz; j++) {
1780       if (*JJ >= cstart) break;
1781       JJ++;
1782     }
1783     d = 0;
1784     for (; j<nz; j++) {
1785       if (*JJ++ >= cend) break;
1786       d++;
1787     }
1788     d_nnz[i] = d;
1789     o_nnz[i] = nz - d;
1790   }
1791   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1792   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1793 
1794   values = (PetscScalar*)V;
1795   if (!values) {
1796     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1797     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1798   }
1799   for (i=0; i<m; i++) {
1800     PetscInt          row    = i + rstart;
1801     PetscInt          ncols  = ii[i+1] - ii[i];
1802     const PetscInt    *icols = jj + ii[i];
1803     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1804     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1805   }
1806 
1807   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1808   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1809   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1810   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 /*MC
1815    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1816    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1817    the matrix is stored.
1818 
1819   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1820   can call MatSetOption(Mat, MAT_HERMITIAN);
1821 
1822    Options Database Keys:
1823 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1824 
1825   Level: beginner
1826 
1827 .seealso: MatCreateMPISBAIJ
1828 M*/
1829 
1830 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "MatCreate_MPISBAIJ"
1834 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1835 {
1836   Mat_MPISBAIJ   *b;
1837   PetscErrorCode ierr;
1838   PetscBool      flg = PETSC_FALSE;
1839 
1840   PetscFunctionBegin;
1841   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1842   B->data = (void*)b;
1843   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1844 
1845   B->ops->destroy = MatDestroy_MPISBAIJ;
1846   B->ops->view    = MatView_MPISBAIJ;
1847   B->assembled    = PETSC_FALSE;
1848   B->insertmode   = NOT_SET_VALUES;
1849 
1850   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1851   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1852 
1853   /* build local table of row and column ownerships */
1854   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
1855 
1856   /* build cache for off array entries formed */
1857   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1858 
1859   b->donotstash  = PETSC_FALSE;
1860   b->colmap      = NULL;
1861   b->garray      = NULL;
1862   b->roworiented = PETSC_TRUE;
1863 
1864   /* stuff used in block assembly */
1865   b->barray = 0;
1866 
1867   /* stuff used for matrix vector multiply */
1868   b->lvec    = 0;
1869   b->Mvctx   = 0;
1870   b->slvec0  = 0;
1871   b->slvec0b = 0;
1872   b->slvec1  = 0;
1873   b->slvec1a = 0;
1874   b->slvec1b = 0;
1875   b->sMvctx  = 0;
1876 
1877   /* stuff for MatGetRow() */
1878   b->rowindices   = 0;
1879   b->rowvalues    = 0;
1880   b->getrowactive = PETSC_FALSE;
1881 
1882   /* hash table stuff */
1883   b->ht           = 0;
1884   b->hd           = 0;
1885   b->ht_size      = 0;
1886   b->ht_flag      = PETSC_FALSE;
1887   b->ht_fact      = 0;
1888   b->ht_total_ct  = 0;
1889   b->ht_insert_ct = 0;
1890 
1891   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1892   b->ijonly = PETSC_FALSE;
1893 
1894   b->in_loc = 0;
1895   b->v_loc  = 0;
1896   b->n_loc  = 0;
1897 
1898   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1899   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1900   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1901   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1902   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1903   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1904 #if defined(PETSC_HAVE_ELEMENTAL)
1905   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
1906 #endif
1907 
1908   B->symmetric                  = PETSC_TRUE;
1909   B->structurally_symmetric     = PETSC_TRUE;
1910   B->symmetric_set              = PETSC_TRUE;
1911   B->structurally_symmetric_set = PETSC_TRUE;
1912 
1913   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1914   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1915   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
1916   if (flg) {
1917     PetscReal fact = 1.39;
1918     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1919     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1920     if (fact <= 1.0) fact = 1.39;
1921     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1922     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1923   }
1924   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 /*MC
1929    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1930 
1931    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1932    and MATMPISBAIJ otherwise.
1933 
1934    Options Database Keys:
1935 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1936 
1937   Level: beginner
1938 
1939 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1940 M*/
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1944 /*@C
1945    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1946    the user should preallocate the matrix storage by setting the parameters
1947    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1948    performance can be increased by more than a factor of 50.
1949 
1950    Collective on Mat
1951 
1952    Input Parameters:
1953 +  B - the matrix
1954 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1955           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1956 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1957            submatrix  (same for all local rows)
1958 .  d_nnz - array containing the number of block nonzeros in the various block rows
1959            in the upper triangular and diagonal part of the in diagonal portion of the local
1960            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1961            for the diagonal entry and set a value even if it is zero.
1962 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1963            submatrix (same for all local rows).
1964 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1965            off-diagonal portion of the local submatrix that is right of the diagonal
1966            (possibly different for each block row) or NULL.
1967 
1968 
1969    Options Database Keys:
1970 .   -mat_no_unroll - uses code that does not unroll the loops in the
1971                      block calculations (much slower)
1972 .   -mat_block_size - size of the blocks to use
1973 
1974    Notes:
1975 
1976    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1977    than it must be used on all processors that share the object for that argument.
1978 
1979    If the *_nnz parameter is given then the *_nz parameter is ignored
1980 
1981    Storage Information:
1982    For a square global matrix we define each processor's diagonal portion
1983    to be its local rows and the corresponding columns (a square submatrix);
1984    each processor's off-diagonal portion encompasses the remainder of the
1985    local matrix (a rectangular submatrix).
1986 
1987    The user can specify preallocated storage for the diagonal part of
1988    the local submatrix with either d_nz or d_nnz (not both).  Set
1989    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1990    memory allocation.  Likewise, specify preallocated storage for the
1991    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1992 
1993    You can call MatGetInfo() to get information on how effective the preallocation was;
1994    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1995    You can also run with the option -info and look for messages with the string
1996    malloc in them to see if additional memory allocation was needed.
1997 
1998    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1999    the figure below we depict these three local rows and all columns (0-11).
2000 
2001 .vb
2002            0 1 2 3 4 5 6 7 8 9 10 11
2003           --------------------------
2004    row 3  |. . . d d d o o o o  o  o
2005    row 4  |. . . d d d o o o o  o  o
2006    row 5  |. . . d d d o o o o  o  o
2007           --------------------------
2008 .ve
2009 
2010    Thus, any entries in the d locations are stored in the d (diagonal)
2011    submatrix, and any entries in the o locations are stored in the
2012    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2013    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2014 
2015    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2016    plus the diagonal part of the d matrix,
2017    and o_nz should indicate the number of block nonzeros per row in the o matrix
2018 
2019    In general, for PDE problems in which most nonzeros are near the diagonal,
2020    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2021    or you will get TERRIBLE performance; see the users' manual chapter on
2022    matrices.
2023 
2024    Level: intermediate
2025 
2026 .keywords: matrix, block, aij, compressed row, sparse, parallel
2027 
2028 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2029 @*/
2030 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2031 {
2032   PetscErrorCode ierr;
2033 
2034   PetscFunctionBegin;
2035   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2036   PetscValidType(B,1);
2037   PetscValidLogicalCollectiveInt(B,bs,2);
2038   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatCreateSBAIJ"
2044 /*@C
2045    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2046    (block compressed row).  For good matrix assembly performance
2047    the user should preallocate the matrix storage by setting the parameters
2048    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2049    performance can be increased by more than a factor of 50.
2050 
2051    Collective on MPI_Comm
2052 
2053    Input Parameters:
2054 +  comm - MPI communicator
2055 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2056           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2057 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2058            This value should be the same as the local size used in creating the
2059            y vector for the matrix-vector product y = Ax.
2060 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2061            This value should be the same as the local size used in creating the
2062            x vector for the matrix-vector product y = Ax.
2063 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2064 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2065 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2066            submatrix  (same for all local rows)
2067 .  d_nnz - array containing the number of block nonzeros in the various block rows
2068            in the upper triangular portion of the in diagonal portion of the local
2069            (possibly different for each block block row) or NULL.
2070            If you plan to factor the matrix you must leave room for the diagonal entry and
2071            set its value even if it is zero.
2072 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2073            submatrix (same for all local rows).
2074 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2075            off-diagonal portion of the local submatrix (possibly different for
2076            each block row) or NULL.
2077 
2078    Output Parameter:
2079 .  A - the matrix
2080 
2081    Options Database Keys:
2082 .   -mat_no_unroll - uses code that does not unroll the loops in the
2083                      block calculations (much slower)
2084 .   -mat_block_size - size of the blocks to use
2085 .   -mat_mpi - use the parallel matrix data structures even on one processor
2086                (defaults to using SeqBAIJ format on one processor)
2087 
2088    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2089    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2090    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2091 
2092    Notes:
2093    The number of rows and columns must be divisible by blocksize.
2094    This matrix type does not support complex Hermitian operation.
2095 
2096    The user MUST specify either the local or global matrix dimensions
2097    (possibly both).
2098 
2099    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2100    than it must be used on all processors that share the object for that argument.
2101 
2102    If the *_nnz parameter is given then the *_nz parameter is ignored
2103 
2104    Storage Information:
2105    For a square global matrix we define each processor's diagonal portion
2106    to be its local rows and the corresponding columns (a square submatrix);
2107    each processor's off-diagonal portion encompasses the remainder of the
2108    local matrix (a rectangular submatrix).
2109 
2110    The user can specify preallocated storage for the diagonal part of
2111    the local submatrix with either d_nz or d_nnz (not both).  Set
2112    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2113    memory allocation.  Likewise, specify preallocated storage for the
2114    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2115 
2116    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2117    the figure below we depict these three local rows and all columns (0-11).
2118 
2119 .vb
2120            0 1 2 3 4 5 6 7 8 9 10 11
2121           --------------------------
2122    row 3  |. . . d d d o o o o  o  o
2123    row 4  |. . . d d d o o o o  o  o
2124    row 5  |. . . d d d o o o o  o  o
2125           --------------------------
2126 .ve
2127 
2128    Thus, any entries in the d locations are stored in the d (diagonal)
2129    submatrix, and any entries in the o locations are stored in the
2130    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2131    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2132 
2133    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2134    plus the diagonal part of the d matrix,
2135    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2136    In general, for PDE problems in which most nonzeros are near the diagonal,
2137    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2138    or you will get TERRIBLE performance; see the users' manual chapter on
2139    matrices.
2140 
2141    Level: intermediate
2142 
2143 .keywords: matrix, block, aij, compressed row, sparse, parallel
2144 
2145 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2146 @*/
2147 
2148 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2149 {
2150   PetscErrorCode ierr;
2151   PetscMPIInt    size;
2152 
2153   PetscFunctionBegin;
2154   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2155   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2156   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2157   if (size > 1) {
2158     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2159     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2160   } else {
2161     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2162     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2170 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2171 {
2172   Mat            mat;
2173   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2174   PetscErrorCode ierr;
2175   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2176   PetscScalar    *array;
2177 
2178   PetscFunctionBegin;
2179   *newmat = 0;
2180 
2181   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2182   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2183   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2184   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2185   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2186   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2187 
2188   mat->factortype   = matin->factortype;
2189   mat->preallocated = PETSC_TRUE;
2190   mat->assembled    = PETSC_TRUE;
2191   mat->insertmode   = NOT_SET_VALUES;
2192 
2193   a      = (Mat_MPISBAIJ*)mat->data;
2194   a->bs2 = oldmat->bs2;
2195   a->mbs = oldmat->mbs;
2196   a->nbs = oldmat->nbs;
2197   a->Mbs = oldmat->Mbs;
2198   a->Nbs = oldmat->Nbs;
2199 
2200 
2201   a->size         = oldmat->size;
2202   a->rank         = oldmat->rank;
2203   a->donotstash   = oldmat->donotstash;
2204   a->roworiented  = oldmat->roworiented;
2205   a->rowindices   = 0;
2206   a->rowvalues    = 0;
2207   a->getrowactive = PETSC_FALSE;
2208   a->barray       = 0;
2209   a->rstartbs     = oldmat->rstartbs;
2210   a->rendbs       = oldmat->rendbs;
2211   a->cstartbs     = oldmat->cstartbs;
2212   a->cendbs       = oldmat->cendbs;
2213 
2214   /* hash table stuff */
2215   a->ht           = 0;
2216   a->hd           = 0;
2217   a->ht_size      = 0;
2218   a->ht_flag      = oldmat->ht_flag;
2219   a->ht_fact      = oldmat->ht_fact;
2220   a->ht_total_ct  = 0;
2221   a->ht_insert_ct = 0;
2222 
2223   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2224   if (oldmat->colmap) {
2225 #if defined(PETSC_USE_CTABLE)
2226     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2227 #else
2228     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2229     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2230     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2231 #endif
2232   } else a->colmap = 0;
2233 
2234   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2235     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2236     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2237     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2238   } else a->garray = 0;
2239 
2240   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2241   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2242   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2243   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2244   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2245 
2246   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2247   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2248   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2249   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2250 
2251   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2252   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2253   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2254   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2255   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2256   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2257   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2258   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2259   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2260   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2261   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2262   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2263   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2264 
2265   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2266   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2267   a->sMvctx = oldmat->sMvctx;
2268   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2269 
2270   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2271   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2272   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2273   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2274   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2275   *newmat = mat;
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 #undef __FUNCT__
2280 #define __FUNCT__ "MatLoad_MPISBAIJ"
2281 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2282 {
2283   PetscErrorCode ierr;
2284   PetscInt       i,nz,j,rstart,rend;
2285   PetscScalar    *vals,*buf;
2286   MPI_Comm       comm;
2287   MPI_Status     status;
2288   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2289   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2290   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2291   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2292   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2293   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2294   int            fd;
2295 
2296   PetscFunctionBegin;
2297   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2298   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2299   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2300   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2301   if (bs < 0) bs = 1;
2302 
2303   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2304   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2305   if (!rank) {
2306     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2307     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2308     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2309     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2310   }
2311 
2312   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2313 
2314   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2315   M    = header[1];
2316   N    = header[2];
2317 
2318   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2319   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2320   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2321 
2322   /* If global sizes are set, check if they are consistent with that given in the file */
2323   if (sizesset) {
2324     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2325   }
2326   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2327   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2328 
2329   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2330 
2331   /*
2332      This code adds extra rows to make sure the number of rows is
2333      divisible by the blocksize
2334   */
2335   Mbs        = M/bs;
2336   extra_rows = bs - M + bs*(Mbs);
2337   if (extra_rows == bs) extra_rows = 0;
2338   else                  Mbs++;
2339   if (extra_rows &&!rank) {
2340     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2341   }
2342 
2343   /* determine ownership of all rows */
2344   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2345     mbs = Mbs/size + ((Mbs % size) > rank);
2346     m   = mbs*bs;
2347   } else { /* User Set */
2348     m   = newmat->rmap->n;
2349     mbs = m/bs;
2350   }
2351   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2352   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2353   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2354   rowners[0] = 0;
2355   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2356   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2357   rstart = rowners[rank];
2358   rend   = rowners[rank+1];
2359 
2360   /* distribute row lengths to all processors */
2361   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2362   if (!rank) {
2363     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2364     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2365     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2366     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2367     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2368     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2369     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2370   } else {
2371     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2372   }
2373 
2374   if (!rank) {   /* procs[0] */
2375     /* calculate the number of nonzeros on each processor */
2376     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2377     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2378     for (i=0; i<size; i++) {
2379       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2380         procsnz[i] += rowlengths[j];
2381       }
2382     }
2383     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2384 
2385     /* determine max buffer needed and allocate it */
2386     maxnz = 0;
2387     for (i=0; i<size; i++) {
2388       maxnz = PetscMax(maxnz,procsnz[i]);
2389     }
2390     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2391 
2392     /* read in my part of the matrix column indices  */
2393     nz     = procsnz[0];
2394     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2395     mycols = ibuf;
2396     if (size == 1) nz -= extra_rows;
2397     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2398     if (size == 1) {
2399       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2400     }
2401 
2402     /* read in every ones (except the last) and ship off */
2403     for (i=1; i<size-1; i++) {
2404       nz   = procsnz[i];
2405       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2406       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2407     }
2408     /* read in the stuff for the last proc */
2409     if (size != 1) {
2410       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2411       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2412       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2413       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2414     }
2415     ierr = PetscFree(cols);CHKERRQ(ierr);
2416   } else {  /* procs[i], i>0 */
2417     /* determine buffer space needed for message */
2418     nz = 0;
2419     for (i=0; i<m; i++) nz += locrowlens[i];
2420     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2421     mycols = ibuf;
2422     /* receive message of column indices*/
2423     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2424     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2425     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2426   }
2427 
2428   /* loop over local rows, determining number of off diagonal entries */
2429   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2430   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2431   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2432   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2433   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2434   rowcount = 0;
2435   nzcount  = 0;
2436   for (i=0; i<mbs; i++) {
2437     dcount  = 0;
2438     odcount = 0;
2439     for (j=0; j<bs; j++) {
2440       kmax = locrowlens[rowcount];
2441       for (k=0; k<kmax; k++) {
2442         tmp = mycols[nzcount++]/bs; /* block col. index */
2443         if (!mask[tmp]) {
2444           mask[tmp] = 1;
2445           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2446           else masked1[dcount++] = tmp; /* entry in diag portion */
2447         }
2448       }
2449       rowcount++;
2450     }
2451 
2452     dlens[i]  = dcount;  /* d_nzz[i] */
2453     odlens[i] = odcount; /* o_nzz[i] */
2454 
2455     /* zero out the mask elements we set */
2456     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2457     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2458   }
2459   if (!sizesset) {
2460     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2461   }
2462   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2463   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2464 
2465   if (!rank) {
2466     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2467     /* read in my part of the matrix numerical values  */
2468     nz     = procsnz[0];
2469     vals   = buf;
2470     mycols = ibuf;
2471     if (size == 1) nz -= extra_rows;
2472     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2473     if (size == 1) {
2474       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2475     }
2476 
2477     /* insert into matrix */
2478     jj = rstart*bs;
2479     for (i=0; i<m; i++) {
2480       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2481       mycols += locrowlens[i];
2482       vals   += locrowlens[i];
2483       jj++;
2484     }
2485 
2486     /* read in other processors (except the last one) and ship out */
2487     for (i=1; i<size-1; i++) {
2488       nz   = procsnz[i];
2489       vals = buf;
2490       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2491       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2492     }
2493     /* the last proc */
2494     if (size != 1) {
2495       nz   = procsnz[i] - extra_rows;
2496       vals = buf;
2497       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2498       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2499       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2500     }
2501     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2502 
2503   } else {
2504     /* receive numeric values */
2505     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2506 
2507     /* receive message of values*/
2508     vals   = buf;
2509     mycols = ibuf;
2510     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2511     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2512     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2513 
2514     /* insert into matrix */
2515     jj = rstart*bs;
2516     for (i=0; i<m; i++) {
2517       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2518       mycols += locrowlens[i];
2519       vals   += locrowlens[i];
2520       jj++;
2521     }
2522   }
2523 
2524   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2525   ierr = PetscFree(buf);CHKERRQ(ierr);
2526   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2527   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2528   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2529   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2530   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2531   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2537 /*XXXXX@
2538    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2539 
2540    Input Parameters:
2541 .  mat  - the matrix
2542 .  fact - factor
2543 
2544    Not Collective on Mat, each process can have a different hash factor
2545 
2546    Level: advanced
2547 
2548   Notes:
2549    This can also be set by the command line option: -mat_use_hash_table fact
2550 
2551 .keywords: matrix, hashtable, factor, HT
2552 
2553 .seealso: MatSetOption()
2554 @XXXXX*/
2555 
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2559 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2560 {
2561   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2562   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2563   PetscReal      atmp;
2564   PetscReal      *work,*svalues,*rvalues;
2565   PetscErrorCode ierr;
2566   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2567   PetscMPIInt    rank,size;
2568   PetscInt       *rowners_bs,dest,count,source;
2569   PetscScalar    *va;
2570   MatScalar      *ba;
2571   MPI_Status     stat;
2572 
2573   PetscFunctionBegin;
2574   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2575   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2576   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2577 
2578   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2579   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2580 
2581   bs  = A->rmap->bs;
2582   mbs = a->mbs;
2583   Mbs = a->Mbs;
2584   ba  = b->a;
2585   bi  = b->i;
2586   bj  = b->j;
2587 
2588   /* find ownerships */
2589   rowners_bs = A->rmap->range;
2590 
2591   /* each proc creates an array to be distributed */
2592   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2593   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2594 
2595   /* row_max for B */
2596   if (rank != size-1) {
2597     for (i=0; i<mbs; i++) {
2598       ncols = bi[1] - bi[0]; bi++;
2599       brow  = bs*i;
2600       for (j=0; j<ncols; j++) {
2601         bcol = bs*(*bj);
2602         for (kcol=0; kcol<bs; kcol++) {
2603           col  = bcol + kcol;                /* local col index */
2604           col += rowners_bs[rank+1];      /* global col index */
2605           for (krow=0; krow<bs; krow++) {
2606             atmp = PetscAbsScalar(*ba); ba++;
2607             row  = brow + krow;   /* local row index */
2608             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2609             if (work[col] < atmp) work[col] = atmp;
2610           }
2611         }
2612         bj++;
2613       }
2614     }
2615 
2616     /* send values to its owners */
2617     for (dest=rank+1; dest<size; dest++) {
2618       svalues = work + rowners_bs[dest];
2619       count   = rowners_bs[dest+1]-rowners_bs[dest];
2620       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2621     }
2622   }
2623 
2624   /* receive values */
2625   if (rank) {
2626     rvalues = work;
2627     count   = rowners_bs[rank+1]-rowners_bs[rank];
2628     for (source=0; source<rank; source++) {
2629       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2630       /* process values */
2631       for (i=0; i<count; i++) {
2632         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2633       }
2634     }
2635   }
2636 
2637   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2638   ierr = PetscFree(work);CHKERRQ(ierr);
2639   PetscFunctionReturn(0);
2640 }
2641 
2642 #undef __FUNCT__
2643 #define __FUNCT__ "MatSOR_MPISBAIJ"
2644 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2645 {
2646   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2647   PetscErrorCode    ierr;
2648   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2649   PetscScalar       *x,*ptr,*from;
2650   Vec               bb1;
2651   const PetscScalar *b;
2652 
2653   PetscFunctionBegin;
2654   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2655   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2656 
2657   if (flag == SOR_APPLY_UPPER) {
2658     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2659     PetscFunctionReturn(0);
2660   }
2661 
2662   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2663     if (flag & SOR_ZERO_INITIAL_GUESS) {
2664       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2665       its--;
2666     }
2667 
2668     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2669     while (its--) {
2670 
2671       /* lower triangular part: slvec0b = - B^T*xx */
2672       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2673 
2674       /* copy xx into slvec0a */
2675       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2676       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2677       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2678       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2679 
2680       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2681 
2682       /* copy bb into slvec1a */
2683       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2684       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2685       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2686       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2687 
2688       /* set slvec1b = 0 */
2689       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2690 
2691       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2692       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2693       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2694       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2695 
2696       /* upper triangular part: bb1 = bb1 - B*x */
2697       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2698 
2699       /* local diagonal sweep */
2700       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2701     }
2702     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2703   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2704     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2705   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2706     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2707   } else if (flag & SOR_EISENSTAT) {
2708     Vec               xx1;
2709     PetscBool         hasop;
2710     const PetscScalar *diag;
2711     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2712     PetscInt          i,n;
2713 
2714     if (!mat->xx1) {
2715       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2716       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2717     }
2718     xx1 = mat->xx1;
2719     bb1 = mat->bb1;
2720 
2721     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2722 
2723     if (!mat->diag) {
2724       /* this is wrong for same matrix with new nonzero values */
2725       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2726       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2727     }
2728     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2729 
2730     if (hasop) {
2731       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2732       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2733     } else {
2734       /*
2735           These two lines are replaced by code that may be a bit faster for a good compiler
2736       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2737       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2738       */
2739       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2740       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2741       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2742       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2743       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2744       if (omega == 1.0) {
2745         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2746         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2747       } else {
2748         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2749         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2750       }
2751       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2752       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2753       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2754       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2755     }
2756 
2757     /* multiply off-diagonal portion of matrix */
2758     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2759     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2760     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2761     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2762     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2763     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2764     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2765     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2766     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2767     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2768 
2769     /* local sweep */
2770     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2771     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2772   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2773   PetscFunctionReturn(0);
2774 }
2775 
2776 #undef __FUNCT__
2777 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2778 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2779 {
2780   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2781   PetscErrorCode ierr;
2782   Vec            lvec1,bb1;
2783 
2784   PetscFunctionBegin;
2785   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2786   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2787 
2788   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2789     if (flag & SOR_ZERO_INITIAL_GUESS) {
2790       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2791       its--;
2792     }
2793 
2794     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2795     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2796     while (its--) {
2797       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2798 
2799       /* lower diagonal part: bb1 = bb - B^T*xx */
2800       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2801       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2802 
2803       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2804       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2805       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2806 
2807       /* upper diagonal part: bb1 = bb1 - B*x */
2808       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2809       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2810 
2811       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2812 
2813       /* diagonal sweep */
2814       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2815     }
2816     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2817     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2818   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 #undef __FUNCT__
2823 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2824 /*@
2825      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2826          CSR format the local rows.
2827 
2828    Collective on MPI_Comm
2829 
2830    Input Parameters:
2831 +  comm - MPI communicator
2832 .  bs - the block size, only a block size of 1 is supported
2833 .  m - number of local rows (Cannot be PETSC_DECIDE)
2834 .  n - This value should be the same as the local size used in creating the
2835        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2836        calculated if N is given) For square matrices n is almost always m.
2837 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2838 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2839 .   i - row indices
2840 .   j - column indices
2841 -   a - matrix values
2842 
2843    Output Parameter:
2844 .   mat - the matrix
2845 
2846    Level: intermediate
2847 
2848    Notes:
2849        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2850      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2851      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2852 
2853        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2854 
2855 .keywords: matrix, aij, compressed row, sparse, parallel
2856 
2857 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2858           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2859 @*/
2860 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2861 {
2862   PetscErrorCode ierr;
2863 
2864 
2865   PetscFunctionBegin;
2866   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2867   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2868   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2869   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2870   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2871   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2872   PetscFunctionReturn(0);
2873 }
2874 
2875 
2876 #undef __FUNCT__
2877 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2878 /*@C
2879    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2880    (the default parallel PETSc format).
2881 
2882    Collective on MPI_Comm
2883 
2884    Input Parameters:
2885 +  B - the matrix
2886 .  bs - the block size
2887 .  i - the indices into j for the start of each local row (starts with zero)
2888 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2889 -  v - optional values in the matrix
2890 
2891    Level: developer
2892 
2893 .keywords: matrix, aij, compressed row, sparse, parallel
2894 
2895 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2896 @*/
2897 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2898 {
2899   PetscErrorCode ierr;
2900 
2901   PetscFunctionBegin;
2902   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2903   PetscFunctionReturn(0);
2904 }
2905 
2906 #undef __FUNCT__
2907 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
2908 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2909 {
2910   PetscErrorCode ierr;
2911   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2912   PetscInt       *indx;
2913   PetscScalar    *values;
2914 
2915   PetscFunctionBegin;
2916   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2917   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2918     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2919     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
2920     PetscInt       *bindx,rmax=a->rmax,j;
2921 
2922     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2923     mbs = m/bs; Nbs = N/cbs;
2924     if (n == PETSC_DECIDE) {
2925       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
2926     }
2927     /* Check sum(n) = Nbs */
2928     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2929     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
2930 
2931     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2932     rstart -= mbs;
2933 
2934     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2935     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
2936     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2937     for (i=0; i<mbs; i++) {
2938       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2939       nnz = nnz/bs;
2940       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2941       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2942       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2943     }
2944     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2945     ierr = PetscFree(bindx);CHKERRQ(ierr);
2946 
2947     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2948     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2949     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2950     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2951     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2952     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2953   }
2954 
2955   /* numeric phase */
2956   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2957   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2958 
2959   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2960   for (i=0; i<m; i++) {
2961     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2962     Ii   = i + rstart;
2963     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2964     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2965   }
2966   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2967   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2968   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2969   PetscFunctionReturn(0);
2970 }
2971