xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 110bb6e1beef8eff0dbcef7b490bbe0d0a77c8ac)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined(PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col  =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B    = baij->B;
249               b    = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba   =b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray     = baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) stepval = (n-1)*bs;
296   else stepval = (m-1)*bs;
297 
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if ((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315             value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend) {
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         } else if (in[j] < 0) continue;
328 #if defined(PETSC_USE_DEBUG)
329         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
330 #endif
331         else {
332           if (mat->was_assembled) {
333             if (!baij->colmap) {
334               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
335             }
336 
337 #if defined(PETSC_USE_DEBUG)
338 #if defined(PETSC_USE_CTABLE)
339             { PetscInt data;
340               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
341               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
342             }
343 #else
344             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345 #endif
346 #endif
347 #if defined(PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
349             col  = (col - 1)/bs;
350 #else
351             col = (baij->colmap[in[j]] - 1)/bs;
352 #endif
353             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
354               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
355               col  =  in[j];
356             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
357           } else col = in[j];
358           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
359         }
360       }
361     } else {
362       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
363       if (!baij->donotstash) {
364         if (roworiented) {
365           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
366         } else {
367           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 #define HASH_KEY 0.6180339887
376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
382 {
383   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
384   PetscBool      roworiented = baij->roworiented;
385   PetscErrorCode ierr;
386   PetscInt       i,j,row,col;
387   PetscInt       rstart_orig=mat->rmap->rstart;
388   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
389   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
390   PetscReal      tmp;
391   MatScalar      **HD = baij->hd,value;
392 #if defined(PETSC_USE_DEBUG)
393   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
394 #endif
395 
396   PetscFunctionBegin;
397   for (i=0; i<m; i++) {
398 #if defined(PETSC_USE_DEBUG)
399     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
400     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
401 #endif
402     row = im[i];
403     if (row >= rstart_orig && row < rend_orig) {
404       for (j=0; j<n; j++) {
405         col = in[j];
406         if (roworiented) value = v[i*n+j];
407         else             value = v[i+j*m];
408         /* Look up PetscInto the Hash Table */
409         key = (row/bs)*Nbs+(col/bs)+1;
410         h1  = HASH(size,key,tmp);
411 
412 
413         idx = h1;
414 #if defined(PETSC_USE_DEBUG)
415         insert_ct++;
416         total_ct++;
417         if (HT[idx] != key) {
418           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
419           if (idx == size) {
420             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
421             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
427           if (idx == size) {
428             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
429             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
430           }
431         }
432 #endif
433         /* A HASH table entry is found, so insert the values at the correct address */
434         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
435         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
436       }
437     } else if (!baij->donotstash) {
438       if (roworiented) {
439         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
440       } else {
441         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
442       }
443     }
444   }
445 #if defined(PETSC_USE_DEBUG)
446   baij->ht_total_ct  = total_ct;
447   baij->ht_insert_ct = insert_ct;
448 #endif
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
454 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
457   PetscBool         roworiented = baij->roworiented;
458   PetscErrorCode    ierr;
459   PetscInt          i,j,ii,jj,row,col;
460   PetscInt          rstart=baij->rstartbs;
461   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
462   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
463   PetscReal         tmp;
464   MatScalar         **HD = baij->hd,*baij_a;
465   const PetscScalar *v_t,*value;
466 #if defined(PETSC_USE_DEBUG)
467   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
468 #endif
469 
470   PetscFunctionBegin;
471   if (roworiented) stepval = (n-1)*bs;
472   else stepval = (m-1)*bs;
473 
474   for (i=0; i<m; i++) {
475 #if defined(PETSC_USE_DEBUG)
476     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
477     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
478 #endif
479     row = im[i];
480     v_t = v + i*nbs2;
481     if (row >= rstart && row < rend) {
482       for (j=0; j<n; j++) {
483         col = in[j];
484 
485         /* Look up into the Hash Table */
486         key = row*Nbs+col+1;
487         h1  = HASH(size,key,tmp);
488 
489         idx = h1;
490 #if defined(PETSC_USE_DEBUG)
491         total_ct++;
492         insert_ct++;
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #else
501         if (HT[idx] != key) {
502           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
503           if (idx == size) {
504             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
505             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
506           }
507         }
508 #endif
509         baij_a = HD[idx];
510         if (roworiented) {
511           /*value = v + i*(stepval+bs)*bs + j*bs;*/
512           /* value = v + (i*(stepval+bs)+j)*bs; */
513           value = v_t;
514           v_t  += bs;
515           if (addv == ADD_VALUES) {
516             for (ii=0; ii<bs; ii++,value+=stepval) {
517               for (jj=ii; jj<bs2; jj+=bs) {
518                 baij_a[jj] += *value++;
519               }
520             }
521           } else {
522             for (ii=0; ii<bs; ii++,value+=stepval) {
523               for (jj=ii; jj<bs2; jj+=bs) {
524                 baij_a[jj] = *value++;
525               }
526             }
527           }
528         } else {
529           value = v + j*(stepval+bs)*bs + i*bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
532               for (jj=0; jj<bs; jj++) {
533                 baij_a[jj] += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
538               for (jj=0; jj<bs; jj++) {
539                 baij_a[jj] = *value++;
540               }
541             }
542           }
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555 #if defined(PETSC_USE_DEBUG)
556   baij->ht_total_ct  = total_ct;
557   baij->ht_insert_ct = insert_ct;
558 #endif
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatGetValues_MPIBAIJ"
564 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
565 {
566   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscErrorCode ierr;
568   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
569   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
574     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
575     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
576       row = idxm[i] - bsrstart;
577       for (j=0; j<n; j++) {
578         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
579         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
580         if (idxn[j] >= bscstart && idxn[j] < bscend) {
581           col  = idxn[j] - bscstart;
582           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
583         } else {
584           if (!baij->colmap) {
585             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
586           }
587 #if defined(PETSC_USE_CTABLE)
588           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
589           data--;
590 #else
591           data = baij->colmap[idxn[j]/bs]-1;
592 #endif
593           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
594           else {
595             col  = data + idxn[j]%bs;
596             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
597           }
598         }
599       }
600     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatNorm_MPIBAIJ"
607 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
608 {
609   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
610   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
611   PetscErrorCode ierr;
612   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
613   PetscReal      sum = 0.0;
614   MatScalar      *v;
615 
616   PetscFunctionBegin;
617   if (baij->size == 1) {
618     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
619   } else {
620     if (type == NORM_FROBENIUS) {
621       v  = amat->a;
622       nz = amat->nz*bs2;
623       for (i=0; i<nz; i++) {
624         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
625       }
626       v  = bmat->a;
627       nz = bmat->nz*bs2;
628       for (i=0; i<nz; i++) {
629         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
630       }
631       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
632       *nrm = PetscSqrtReal(*nrm);
633     } else if (type == NORM_1) { /* max column sum */
634       PetscReal *tmp,*tmp2;
635       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
636       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
637       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
638       v    = amat->a; jj = amat->j;
639       for (i=0; i<amat->nz; i++) {
640         for (j=0; j<bs; j++) {
641           col = bs*(cstart + *jj) + j; /* column index */
642           for (row=0; row<bs; row++) {
643             tmp[col] += PetscAbsScalar(*v);  v++;
644           }
645         }
646         jj++;
647       }
648       v = bmat->a; jj = bmat->j;
649       for (i=0; i<bmat->nz; i++) {
650         for (j=0; j<bs; j++) {
651           col = bs*garray[*jj] + j;
652           for (row=0; row<bs; row++) {
653             tmp[col] += PetscAbsScalar(*v); v++;
654           }
655         }
656         jj++;
657       }
658       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
659       *nrm = 0.0;
660       for (j=0; j<mat->cmap->N; j++) {
661         if (tmp2[j] > *nrm) *nrm = tmp2[j];
662       }
663       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
664     } else if (type == NORM_INFINITY) { /* max row sum */
665       PetscReal *sums;
666       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
667       sum  = 0.0;
668       for (j=0; j<amat->mbs; j++) {
669         for (row=0; row<bs; row++) sums[row] = 0.0;
670         v  = amat->a + bs2*amat->i[j];
671         nz = amat->i[j+1]-amat->i[j];
672         for (i=0; i<nz; i++) {
673           for (col=0; col<bs; col++) {
674             for (row=0; row<bs; row++) {
675               sums[row] += PetscAbsScalar(*v); v++;
676             }
677           }
678         }
679         v  = bmat->a + bs2*bmat->i[j];
680         nz = bmat->i[j+1]-bmat->i[j];
681         for (i=0; i<nz; i++) {
682           for (col=0; col<bs; col++) {
683             for (row=0; row<bs; row++) {
684               sums[row] += PetscAbsScalar(*v); v++;
685             }
686           }
687         }
688         for (row=0; row<bs; row++) {
689           if (sums[row] > sum) sum = sums[row];
690         }
691       }
692       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
693       ierr = PetscFree(sums);CHKERRQ(ierr);
694     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
695   }
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   Creates the hash table, and sets the table
701   This table is created only once.
702   If new entried need to be added to the matrix
703   then the hash table has to be destroyed and
704   recreated.
705 */
706 #undef __FUNCT__
707 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
708 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
709 {
710   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
711   Mat            A     = baij->A,B=baij->B;
712   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
713   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
714   PetscErrorCode ierr;
715   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
716   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
717   PetscInt       *HT,key;
718   MatScalar      **HD;
719   PetscReal      tmp;
720 #if defined(PETSC_USE_INFO)
721   PetscInt ct=0,max=0;
722 #endif
723 
724   PetscFunctionBegin;
725   if (baij->ht) PetscFunctionReturn(0);
726 
727   baij->ht_size = (PetscInt)(factor*nz);
728   ht_size       = baij->ht_size;
729 
730   /* Allocate Memory for Hash Table */
731   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
732   HD   = baij->hd;
733   HT   = baij->ht;
734 
735   /* Loop Over A */
736   for (i=0; i<a->mbs; i++) {
737     for (j=ai[i]; j<ai[i+1]; j++) {
738       row = i+rstart;
739       col = aj[j]+cstart;
740 
741       key = row*Nbs + col + 1;
742       h1  = HASH(ht_size,key,tmp);
743       for (k=0; k<ht_size; k++) {
744         if (!HT[(h1+k)%ht_size]) {
745           HT[(h1+k)%ht_size] = key;
746           HD[(h1+k)%ht_size] = a->a + j*bs2;
747           break;
748 #if defined(PETSC_USE_INFO)
749         } else {
750           ct++;
751 #endif
752         }
753       }
754 #if defined(PETSC_USE_INFO)
755       if (k> max) max = k;
756 #endif
757     }
758   }
759   /* Loop Over B */
760   for (i=0; i<b->mbs; i++) {
761     for (j=bi[i]; j<bi[i+1]; j++) {
762       row = i+rstart;
763       col = garray[bj[j]];
764       key = row*Nbs + col + 1;
765       h1  = HASH(ht_size,key,tmp);
766       for (k=0; k<ht_size; k++) {
767         if (!HT[(h1+k)%ht_size]) {
768           HT[(h1+k)%ht_size] = key;
769           HD[(h1+k)%ht_size] = b->a + j*bs2;
770           break;
771 #if defined(PETSC_USE_INFO)
772         } else {
773           ct++;
774 #endif
775         }
776       }
777 #if defined(PETSC_USE_INFO)
778       if (k> max) max = k;
779 #endif
780     }
781   }
782 
783   /* Print Summary */
784 #if defined(PETSC_USE_INFO)
785   for (i=0,j=0; i<ht_size; i++) {
786     if (HT[i]) j++;
787   }
788   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
789 #endif
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
795 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
796 {
797   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
798   PetscErrorCode ierr;
799   PetscInt       nstash,reallocs;
800   InsertMode     addv;
801 
802   PetscFunctionBegin;
803   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
804 
805   /* make sure all processors are either in INSERTMODE or ADDMODE */
806   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
807   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
808   mat->insertmode = addv; /* in case this processor had no cache */
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
812   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
813   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
821 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
822 {
823   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
824   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
825   PetscErrorCode ierr;
826   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
827   PetscInt       *row,*col;
828   PetscBool      r1,r2,r3,other_disassembled;
829   MatScalar      *val;
830   InsertMode     addv = mat->insertmode;
831   PetscMPIInt    n;
832 
833   PetscFunctionBegin;
834   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
835   if (!baij->donotstash && !mat->nooffprocentries) {
836     while (1) {
837       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
838       if (!flg) break;
839 
840       for (i=0; i<n;) {
841         /* Now identify the consecutive vals belonging to the same row */
842         for (j=i,rstart=row[j]; j<n; j++) {
843           if (row[j] != rstart) break;
844         }
845         if (j < n) ncols = j-i;
846         else       ncols = n-i;
847         /* Now assemble all these values with a single function call */
848         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
849         i    = j;
850       }
851     }
852     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
853     /* Now process the block-stash. Since the values are stashed column-oriented,
854        set the roworiented flag to column oriented, and after MatSetValues()
855        restore the original flags */
856     r1 = baij->roworiented;
857     r2 = a->roworiented;
858     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
859 
860     baij->roworiented = PETSC_FALSE;
861     a->roworiented    = PETSC_FALSE;
862 
863     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
864     while (1) {
865       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
866       if (!flg) break;
867 
868       for (i=0; i<n;) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for (j=i,rstart=row[j]; j<n; j++) {
871           if (row[j] != rstart) break;
872         }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
876         i    = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
880 
881     baij->roworiented = r1;
882     a->roworiented    = r2;
883 
884     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
885   }
886 
887   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
888   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
889 
890   /* determine if any processor has disassembled, if so we must
891      also disassemble ourselfs, in order that we may reassemble. */
892   /*
893      if nonzero structure of submatrix B cannot change then we know that
894      no processor disassembled thus we can skip this stuff
895   */
896   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
897     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
898     if (mat->was_assembled && !other_disassembled) {
899       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
900     }
901   }
902 
903   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
904     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
905   }
906   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
907   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
908 
909 #if defined(PETSC_USE_INFO)
910   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
911     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
912 
913     baij->ht_total_ct  = 0;
914     baij->ht_insert_ct = 0;
915   }
916 #endif
917   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
918     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
919 
920     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
921     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
922   }
923 
924   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
925 
926   baij->rowvalues = 0;
927 
928   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
929   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
930     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
931     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
937 #include <petscdraw.h>
938 #undef __FUNCT__
939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
941 {
942   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
943   PetscErrorCode    ierr;
944   PetscMPIInt       rank = baij->rank;
945   PetscInt          bs   = mat->rmap->bs;
946   PetscBool         iascii,isdraw;
947   PetscViewer       sviewer;
948   PetscViewerFormat format;
949 
950   PetscFunctionBegin;
951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
953   if (iascii) {
954     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
955     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
956       MatInfo info;
957       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
958       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
961                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
964       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
966       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
967       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
968       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
969       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     } else if (format == PETSC_VIEWER_ASCII_INFO) {
972       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
973       PetscFunctionReturn(0);
974     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
975       PetscFunctionReturn(0);
976     }
977   }
978 
979   if (isdraw) {
980     PetscDraw draw;
981     PetscBool isnull;
982     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
983     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
984   }
985 
986   {
987     /* assemble the entire matrix onto first processor. */
988     Mat         A;
989     Mat_SeqBAIJ *Aloc;
990     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
991     MatScalar   *a;
992 
993     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
994     /* Perhaps this should be the type of mat? */
995     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
996     if (!rank) {
997       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
998     } else {
999       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1000     }
1001     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1002     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1003     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1004     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1005 
1006     /* copy over the A part */
1007     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1008     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1009     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1010 
1011     for (i=0; i<mbs; i++) {
1012       rvals[0] = bs*(baij->rstartbs + i);
1013       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1014       for (j=ai[i]; j<ai[i+1]; j++) {
1015         col = (baij->cstartbs+aj[j])*bs;
1016         for (k=0; k<bs; k++) {
1017           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1018           col++; a += bs;
1019         }
1020       }
1021     }
1022     /* copy over the B part */
1023     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1024     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = baij->garray[aj[j]]*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     ierr = PetscFree(rvals);CHKERRQ(ierr);
1037     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     /*
1040        Everyone has to call to draw the matrix since the graphics waits are
1041        synchronized across all processors that share the PetscDraw object
1042     */
1043     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1044     if (!rank) {
1045       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1046     }
1047     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1048     ierr = MatDestroy(&A);CHKERRQ(ierr);
1049   }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1055 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1056 {
1057   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1058   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1059   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1060   PetscErrorCode ierr;
1061   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1062   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1063   int            fd;
1064   PetscScalar    *column_values;
1065   FILE           *file;
1066   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1067   PetscInt       message_count,flowcontrolcount;
1068 
1069   PetscFunctionBegin;
1070   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1071   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1072   nz   = bs2*(A->nz + B->nz);
1073   rlen = mat->rmap->n;
1074   if (!rank) {
1075     header[0] = MAT_FILE_CLASSID;
1076     header[1] = mat->rmap->N;
1077     header[2] = mat->cmap->N;
1078 
1079     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1080     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1081     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1082     /* get largest number of rows any processor has */
1083     range = mat->rmap->range;
1084     for (i=1; i<size; i++) {
1085       rlen = PetscMax(rlen,range[i+1] - range[i]);
1086     }
1087   } else {
1088     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1089   }
1090 
1091   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1092   /* compute lengths of each row  */
1093   for (i=0; i<a->mbs; i++) {
1094     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1095   }
1096   /* store the row lengths to the file */
1097   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1098   if (!rank) {
1099     MPI_Status status;
1100     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1101     rlen = (range[1] - range[0])/bs;
1102     for (i=0; i<rlen; i++) {
1103       for (j=0; j<bs; j++) {
1104         row_lens[i*bs+j] = bs*crow_lens[i];
1105       }
1106     }
1107     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1108     for (i=1; i<size; i++) {
1109       rlen = (range[i+1] - range[i])/bs;
1110       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1111       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1112       for (k=0; k<rlen; k++) {
1113         for (j=0; j<bs; j++) {
1114           row_lens[k*bs+j] = bs*crow_lens[k];
1115         }
1116       }
1117       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1118     }
1119     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1120     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1121   } else {
1122     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1123     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1124     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1125   }
1126   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1127 
1128   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1129      information needed to make it for each row from a block row. This does require more communication but still not more than
1130      the communication needed for the nonzero values  */
1131   nzmax = nz; /*  space a largest processor needs */
1132   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1133   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1134   cnt   = 0;
1135   for (i=0; i<a->mbs; i++) {
1136     pcnt = cnt;
1137     for (j=B->i[i]; j<B->i[i+1]; j++) {
1138       if ((col = garray[B->j[j]]) > cstart) break;
1139       for (l=0; l<bs; l++) {
1140         column_indices[cnt++] = bs*col+l;
1141       }
1142     }
1143     for (k=A->i[i]; k<A->i[i+1]; k++) {
1144       for (l=0; l<bs; l++) {
1145         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1146       }
1147     }
1148     for (; j<B->i[i+1]; j++) {
1149       for (l=0; l<bs; l++) {
1150         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1151       }
1152     }
1153     len = cnt - pcnt;
1154     for (k=1; k<bs; k++) {
1155       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1156       cnt += len;
1157     }
1158   }
1159   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1160 
1161   /* store the columns to the file */
1162   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1163   if (!rank) {
1164     MPI_Status status;
1165     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166     for (i=1; i<size; i++) {
1167       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1168       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1169       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1170       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     }
1172     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1173   } else {
1174     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1175     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1176     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1177     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1178   }
1179   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1180 
1181   /* load up the numerical values */
1182   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1183   cnt  = 0;
1184   for (i=0; i<a->mbs; i++) {
1185     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1186     for (j=B->i[i]; j<B->i[i+1]; j++) {
1187       if (garray[B->j[j]] > cstart) break;
1188       for (l=0; l<bs; l++) {
1189         for (ll=0; ll<bs; ll++) {
1190           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1191         }
1192       }
1193       cnt += bs;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) {
1196       for (l=0; l<bs; l++) {
1197         for (ll=0; ll<bs; ll++) {
1198           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1199         }
1200       }
1201       cnt += bs;
1202     }
1203     for (; j<B->i[i+1]; j++) {
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     cnt += (bs-1)*rlen;
1212   }
1213   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1214 
1215   /* store the column values to the file */
1216   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1217   if (!rank) {
1218     MPI_Status status;
1219     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1220     for (i=1; i<size; i++) {
1221       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1222       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1223       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1224       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1225     }
1226     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1227   } else {
1228     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1229     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1232   }
1233   ierr = PetscFree(column_values);CHKERRQ(ierr);
1234 
1235   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1236   if (file) {
1237     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatView_MPIBAIJ"
1244 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1245 {
1246   PetscErrorCode ierr;
1247   PetscBool      iascii,isdraw,issocket,isbinary;
1248 
1249   PetscFunctionBegin;
1250   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1254   if (iascii || isdraw || issocket) {
1255     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1256   } else if (isbinary) {
1257     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1264 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1265 {
1266   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270 #if defined(PETSC_USE_LOG)
1271   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1272 #endif
1273   ierr = MatDestroy_Redundant(&mat->redundant);CHKERRQ(ierr);
1274   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1275   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1277   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1278 #if defined(PETSC_USE_CTABLE)
1279   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1280 #else
1281   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1282 #endif
1283   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1284   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1285   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1286   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1287   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1288   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1290   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1291 
1292   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatMult_MPIBAIJ"
1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1308 {
1309   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       nt;
1312 
1313   PetscFunctionBegin;
1314   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1315   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1316   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1317   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1318   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1320   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1328 {
1329   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1335   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1343 {
1344   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1345   PetscErrorCode ierr;
1346   PetscBool      merged;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1350   /* do nondiagonal part */
1351   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1352   if (!merged) {
1353     /* send it on its way */
1354     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1355     /* do local part */
1356     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1357     /* receive remote parts: note this assumes the values are not actually */
1358     /* inserted in yy until the next line */
1359     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1360   } else {
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* send it on its way */
1364     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1365     /* values actually were received in the Begin() but we need to call this nop */
1366     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   /* do nondiagonal part */
1380   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1381   /* send it on its way */
1382   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   /* do local part */
1384   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1385   /* receive remote parts: note this assumes the values are not actually */
1386   /* inserted in yy until the next line, which is true for my implementation*/
1387   /* but is not perhaps always true. */
1388   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*
1393   This only works correctly for square matrices where the subblock A->A is the
1394    diagonal block
1395 */
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1405   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatScale_MPIBAIJ"
1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1412 {
1413   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1414   PetscErrorCode ierr;
1415 
1416   PetscFunctionBegin;
1417   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1418   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1425 {
1426   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1427   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1428   PetscErrorCode ierr;
1429   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1430   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1431   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1432 
1433   PetscFunctionBegin;
1434   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1435   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1436   mat->getrowactive = PETSC_TRUE;
1437 
1438   if (!mat->rowvalues && (idx || v)) {
1439     /*
1440         allocate enough space to hold information from the longest row.
1441     */
1442     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1443     PetscInt    max = 1,mbs = mat->mbs,tmp;
1444     for (i=0; i<mbs; i++) {
1445       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1446       if (max < tmp) max = tmp;
1447     }
1448     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1449   }
1450   lrow = row - brstart;
1451 
1452   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1453   if (!v)   {pvA = 0; pvB = 0;}
1454   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1455   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1456   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1457   nztot = nzA + nzB;
1458 
1459   cmap = mat->garray;
1460   if (v  || idx) {
1461     if (nztot) {
1462       /* Sort by increasing column numbers, assuming A and B already sorted */
1463       PetscInt imark = -1;
1464       if (v) {
1465         *v = v_p = mat->rowvalues;
1466         for (i=0; i<nzB; i++) {
1467           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1468           else break;
1469         }
1470         imark = i;
1471         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1472         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1473       }
1474       if (idx) {
1475         *idx = idx_p = mat->rowindices;
1476         if (imark > -1) {
1477           for (i=0; i<imark; i++) {
1478             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479           }
1480         } else {
1481           for (i=0; i<nzB; i++) {
1482             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1483             else break;
1484           }
1485           imark = i;
1486         }
1487         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1488         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1489       }
1490     } else {
1491       if (idx) *idx = 0;
1492       if (v)   *v   = 0;
1493     }
1494   }
1495   *nz  = nztot;
1496   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1497   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1503 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1504 {
1505   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1506 
1507   PetscFunctionBegin;
1508   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1509   baij->getrowactive = PETSC_FALSE;
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1515 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1516 {
1517   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1518   PetscErrorCode ierr;
1519 
1520   PetscFunctionBegin;
1521   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1522   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1528 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1529 {
1530   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1531   Mat            A  = a->A,B = a->B;
1532   PetscErrorCode ierr;
1533   PetscReal      isend[5],irecv[5];
1534 
1535   PetscFunctionBegin;
1536   info->block_size = (PetscReal)matin->rmap->bs;
1537 
1538   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1539 
1540   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1541   isend[3] = info->memory;  isend[4] = info->mallocs;
1542 
1543   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1544 
1545   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1546   isend[3] += info->memory;  isend[4] += info->mallocs;
1547 
1548   if (flag == MAT_LOCAL) {
1549     info->nz_used      = isend[0];
1550     info->nz_allocated = isend[1];
1551     info->nz_unneeded  = isend[2];
1552     info->memory       = isend[3];
1553     info->mallocs      = isend[4];
1554   } else if (flag == MAT_GLOBAL_MAX) {
1555     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1556 
1557     info->nz_used      = irecv[0];
1558     info->nz_allocated = irecv[1];
1559     info->nz_unneeded  = irecv[2];
1560     info->memory       = irecv[3];
1561     info->mallocs      = irecv[4];
1562   } else if (flag == MAT_GLOBAL_SUM) {
1563     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1564 
1565     info->nz_used      = irecv[0];
1566     info->nz_allocated = irecv[1];
1567     info->nz_unneeded  = irecv[2];
1568     info->memory       = irecv[3];
1569     info->mallocs      = irecv[4];
1570   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1571   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1572   info->fill_ratio_needed = 0;
1573   info->factor_mallocs    = 0;
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1579 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1580 {
1581   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   switch (op) {
1586   case MAT_NEW_NONZERO_LOCATIONS:
1587   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1588   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1589   case MAT_KEEP_NONZERO_PATTERN:
1590   case MAT_NEW_NONZERO_LOCATION_ERR:
1591     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1592     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1593     break;
1594   case MAT_ROW_ORIENTED:
1595     a->roworiented = flg;
1596 
1597     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1598     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1599     break;
1600   case MAT_NEW_DIAGONALS:
1601     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1602     break;
1603   case MAT_IGNORE_OFF_PROC_ENTRIES:
1604     a->donotstash = flg;
1605     break;
1606   case MAT_USE_HASH_TABLE:
1607     a->ht_flag = flg;
1608     break;
1609   case MAT_SYMMETRIC:
1610   case MAT_STRUCTURALLY_SYMMETRIC:
1611   case MAT_HERMITIAN:
1612   case MAT_SYMMETRY_ETERNAL:
1613     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1614     break;
1615   default:
1616     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1617   }
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 #undef __FUNCT__
1622 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1623 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1624 {
1625   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1626   Mat_SeqBAIJ    *Aloc;
1627   Mat            B;
1628   PetscErrorCode ierr;
1629   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1630   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1631   MatScalar      *a;
1632 
1633   PetscFunctionBegin;
1634   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1635   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1636     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1637     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1638     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1639     /* Do not know preallocation information, but must set block size */
1640     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1641   } else {
1642     B = *matout;
1643   }
1644 
1645   /* copy over the A part */
1646   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1647   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1648   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1649 
1650   for (i=0; i<mbs; i++) {
1651     rvals[0] = bs*(baij->rstartbs + i);
1652     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1653     for (j=ai[i]; j<ai[i+1]; j++) {
1654       col = (baij->cstartbs+aj[j])*bs;
1655       for (k=0; k<bs; k++) {
1656         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1657 
1658         col++; a += bs;
1659       }
1660     }
1661   }
1662   /* copy over the B part */
1663   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1664   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1665   for (i=0; i<mbs; i++) {
1666     rvals[0] = bs*(baij->rstartbs + i);
1667     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1668     for (j=ai[i]; j<ai[i+1]; j++) {
1669       col = baij->garray[aj[j]]*bs;
1670       for (k=0; k<bs; k++) {
1671         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1672         col++;
1673         a += bs;
1674       }
1675     }
1676   }
1677   ierr = PetscFree(rvals);CHKERRQ(ierr);
1678   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1679   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1680 
1681   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1682   else {
1683     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1684   }
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1690 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1691 {
1692   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1693   Mat            a     = baij->A,b = baij->B;
1694   PetscErrorCode ierr;
1695   PetscInt       s1,s2,s3;
1696 
1697   PetscFunctionBegin;
1698   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1699   if (rr) {
1700     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1701     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1702     /* Overlap communication with computation. */
1703     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1704   }
1705   if (ll) {
1706     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1707     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1708     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1709   }
1710   /* scale  the diagonal block */
1711   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1712 
1713   if (rr) {
1714     /* Do a scatter end and then right scale the off-diagonal block */
1715     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1716     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1717   }
1718   PetscFunctionReturn(0);
1719 }
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1723 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1724 {
1725   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1726   PetscInt      *owners = A->rmap->range;
1727   PetscInt       n      = A->rmap->n;
1728   PetscSF        sf;
1729   PetscInt      *lrows;
1730   PetscSFNode   *rrows;
1731   PetscInt       r, p = 0, len = 0;
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   /* Create SF where leaves are input rows and roots are owned rows */
1736   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1737   for (r = 0; r < n; ++r) lrows[r] = -1;
1738   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
1739   for (r = 0; r < N; ++r) {
1740     const PetscInt idx   = rows[r];
1741     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1742     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1743       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1744     }
1745     if (A->nooffproczerorows) {
1746       if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank);
1747       lrows[len++] = idx - owners[p];
1748     } else {
1749       rrows[r].rank = p;
1750       rrows[r].index = rows[r] - owners[p];
1751     }
1752   }
1753   if (!A->nooffproczerorows) {
1754     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1755     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1756     /* Collect flags for rows to be zeroed */
1757     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1758     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1759     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1760     /* Compress and put in row numbers */
1761     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1762   }
1763   /* fix right hand side if needed */
1764   if (x && b) {
1765     const PetscScalar *xx;
1766     PetscScalar       *bb;
1767 
1768     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1769     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1770     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1771     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1772     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1773   }
1774 
1775   /* actually zap the local rows */
1776   /*
1777         Zero the required rows. If the "diagonal block" of the matrix
1778      is square and the user wishes to set the diagonal we use separate
1779      code so that MatSetValues() is not called for each diagonal allocating
1780      new memory, thus calling lots of mallocs and slowing things down.
1781 
1782   */
1783   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1784   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1785   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1786     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1787   } else if (diag != 0.0) {
1788     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1789     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1790        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1791     for (r = 0; r < len; ++r) {
1792       const PetscInt row = lrows[r] + A->rmap->rstart;
1793       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1794     }
1795     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1797   } else {
1798     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1799   }
1800   ierr = PetscFree(lrows);CHKERRQ(ierr);
1801 
1802   /* only change matrix nonzero state if pattern was allowed to be changed */
1803   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1804     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1805     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1806   }
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1812 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1813 {
1814   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1815   PetscErrorCode    ierr;
1816   PetscMPIInt       n = A->rmap->n;
1817   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1818   PetscInt          *lrows,*owners = A->rmap->range;
1819   PetscSFNode       *rrows;
1820   PetscSF           sf;
1821   const PetscScalar *xx;
1822   PetscScalar       *bb,*mask;
1823   Vec               xmask,lmask;
1824   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1825   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1826   PetscScalar       *aa;
1827 
1828   PetscFunctionBegin;
1829   /* Create SF where leaves are input rows and roots are owned rows */
1830   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1831   for (r = 0; r < n; ++r) lrows[r] = -1;
1832   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1833   for (r = 0; r < N; ++r) {
1834     const PetscInt idx   = rows[r];
1835     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1836     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1837       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1838     }
1839     rrows[r].rank  = p;
1840     rrows[r].index = rows[r] - owners[p];
1841   }
1842   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1843   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1844   /* Collect flags for rows to be zeroed */
1845   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1846   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1847   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1848   /* Compress and put in row numbers */
1849   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1850   /* zero diagonal part of matrix */
1851   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1852   /* handle off diagonal part of matrix */
1853   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1854   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1855   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1856   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1857   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1858   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1859   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1860   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1861   if (x) {
1862     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1863     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1864     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1865     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1866   }
1867   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1868   /* remove zeroed rows of off diagonal matrix */
1869   for (i = 0; i < len; ++i) {
1870     row   = lrows[i];
1871     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1872     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1873     for (k = 0; k < count; ++k) {
1874       aa[0] = 0.0;
1875       aa   += bs;
1876     }
1877   }
1878   /* loop over all elements of off process part of matrix zeroing removed columns*/
1879   for (i = 0; i < l->B->rmap->N; ++i) {
1880     row = i/bs;
1881     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1882       for (k = 0; k < bs; ++k) {
1883         col = bs*baij->j[j] + k;
1884         if (PetscAbsScalar(mask[col])) {
1885           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1886           if (b) bb[i] -= aa[0]*xx[col];
1887           aa[0] = 0.0;
1888         }
1889       }
1890     }
1891   }
1892   if (x) {
1893     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1894     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1895   }
1896   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1897   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1898   ierr = PetscFree(lrows);CHKERRQ(ierr);
1899 
1900   /* only change matrix nonzero state if pattern was allowed to be changed */
1901   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1902     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1903     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1904   }
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1910 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1911 {
1912   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1913   PetscErrorCode ierr;
1914 
1915   PetscFunctionBegin;
1916   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatEqual_MPIBAIJ"
1924 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1925 {
1926   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1927   Mat            a,b,c,d;
1928   PetscBool      flg;
1929   PetscErrorCode ierr;
1930 
1931   PetscFunctionBegin;
1932   a = matA->A; b = matA->B;
1933   c = matB->A; d = matB->B;
1934 
1935   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1936   if (flg) {
1937     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1938   }
1939   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatCopy_MPIBAIJ"
1945 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1946 {
1947   PetscErrorCode ierr;
1948   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1949   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1950 
1951   PetscFunctionBegin;
1952   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1953   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1954     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1955   } else {
1956     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1957     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1964 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1965 {
1966   PetscErrorCode ierr;
1967 
1968   PetscFunctionBegin;
1969   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 #undef __FUNCT__
1974 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
1975 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1976 {
1977   PetscErrorCode ierr;
1978   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1979   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1980   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1981 
1982   PetscFunctionBegin;
1983   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1984   PetscFunctionReturn(0);
1985 }
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1989 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1990 {
1991   PetscErrorCode ierr;
1992   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1993   PetscBLASInt   bnz,one=1;
1994   Mat_SeqBAIJ    *x,*y;
1995 
1996   PetscFunctionBegin;
1997   if (str == SAME_NONZERO_PATTERN) {
1998     PetscScalar alpha = a;
1999     x    = (Mat_SeqBAIJ*)xx->A->data;
2000     y    = (Mat_SeqBAIJ*)yy->A->data;
2001     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2002     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2003     x    = (Mat_SeqBAIJ*)xx->B->data;
2004     y    = (Mat_SeqBAIJ*)yy->B->data;
2005     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2006     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2007     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2008   } else {
2009     Mat      B;
2010     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2011     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2012     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2013     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2014     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2015     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2016     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2017     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2018     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2019     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2020     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2021     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2022     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2023     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2024     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2025     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2026   }
2027   PetscFunctionReturn(0);
2028 }
2029 
2030 #undef __FUNCT__
2031 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2032 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2033 {
2034   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2035   PetscErrorCode ierr;
2036 
2037   PetscFunctionBegin;
2038   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2039   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 #undef __FUNCT__
2044 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2045 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2046 {
2047   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2048   PetscErrorCode ierr;
2049 
2050   PetscFunctionBegin;
2051   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2052   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2058 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2059 {
2060   PetscErrorCode ierr;
2061   IS             iscol_local;
2062   PetscInt       csize;
2063 
2064   PetscFunctionBegin;
2065   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2066   if (call == MAT_REUSE_MATRIX) {
2067     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2068     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2069   } else {
2070     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2071   }
2072   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2073   if (call == MAT_INITIAL_MATRIX) {
2074     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2075     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2076   }
2077   PetscFunctionReturn(0);
2078 }
2079 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2080 #undef __FUNCT__
2081 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2082 /*
2083   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2084   in local and then by concatenating the local matrices the end result.
2085   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2086 */
2087 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2088 {
2089   PetscErrorCode ierr;
2090   PetscMPIInt    rank,size;
2091   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2092   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2093   Mat            M,Mreuse;
2094   MatScalar      *vwork,*aa;
2095   MPI_Comm       comm;
2096   IS             isrow_new, iscol_new;
2097   PetscBool      idflag,allrows, allcols;
2098   Mat_SeqBAIJ    *aij;
2099 
2100   PetscFunctionBegin;
2101   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2102   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2103   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2104   /* The compression and expansion should be avoided. Doesn't point
2105      out errors, might change the indices, hence buggey */
2106   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2107   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2108 
2109   /* Check for special case: each processor gets entire matrix columns */
2110   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2111   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2112   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2113   else allcols = PETSC_FALSE;
2114 
2115   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2116   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2117   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2118   else allrows = PETSC_FALSE;
2119 
2120   if (call ==  MAT_REUSE_MATRIX) {
2121     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2122     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2123     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2124   } else {
2125     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2126   }
2127   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2128   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2129   /*
2130       m - number of local rows
2131       n - number of columns (same on all processors)
2132       rstart - first row in new global matrix generated
2133   */
2134   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2135   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2136   m    = m/bs;
2137   n    = n/bs;
2138 
2139   if (call == MAT_INITIAL_MATRIX) {
2140     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2141     ii  = aij->i;
2142     jj  = aij->j;
2143 
2144     /*
2145         Determine the number of non-zeros in the diagonal and off-diagonal
2146         portions of the matrix in order to do correct preallocation
2147     */
2148 
2149     /* first get start and end of "diagonal" columns */
2150     if (csize == PETSC_DECIDE) {
2151       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2152       if (mglobal == n*bs) { /* square matrix */
2153         nlocal = m;
2154       } else {
2155         nlocal = n/size + ((n % size) > rank);
2156       }
2157     } else {
2158       nlocal = csize/bs;
2159     }
2160     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2161     rstart = rend - nlocal;
2162     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2163 
2164     /* next, compute all the lengths */
2165     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2166     for (i=0; i<m; i++) {
2167       jend = ii[i+1] - ii[i];
2168       olen = 0;
2169       dlen = 0;
2170       for (j=0; j<jend; j++) {
2171         if (*jj < rstart || *jj >= rend) olen++;
2172         else dlen++;
2173         jj++;
2174       }
2175       olens[i] = olen;
2176       dlens[i] = dlen;
2177     }
2178     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2179     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2180     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2181     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2182     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2183   } else {
2184     PetscInt ml,nl;
2185 
2186     M    = *newmat;
2187     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2188     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2189     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2190     /*
2191          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2192        rather than the slower MatSetValues().
2193     */
2194     M->was_assembled = PETSC_TRUE;
2195     M->assembled     = PETSC_FALSE;
2196   }
2197   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2198   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2199   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2200   ii   = aij->i;
2201   jj   = aij->j;
2202   aa   = aij->a;
2203   for (i=0; i<m; i++) {
2204     row   = rstart/bs + i;
2205     nz    = ii[i+1] - ii[i];
2206     cwork = jj;     jj += nz;
2207     vwork = aa;     aa += nz*bs*bs;
2208     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2209   }
2210 
2211   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2212   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2213   *newmat = M;
2214 
2215   /* save submatrix used in processor for next request */
2216   if (call ==  MAT_INITIAL_MATRIX) {
2217     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2218     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2219   }
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 #undef __FUNCT__
2224 #define __FUNCT__ "MatPermute_MPIBAIJ"
2225 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2226 {
2227   MPI_Comm       comm,pcomm;
2228   PetscInt       clocal_size,nrows;
2229   const PetscInt *rows;
2230   PetscMPIInt    size;
2231   IS             crowp,lcolp;
2232   PetscErrorCode ierr;
2233 
2234   PetscFunctionBegin;
2235   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2236   /* make a collective version of 'rowp' */
2237   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2238   if (pcomm==comm) {
2239     crowp = rowp;
2240   } else {
2241     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2242     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2243     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2244     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2245   }
2246   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2247   /* make a local version of 'colp' */
2248   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2249   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2250   if (size==1) {
2251     lcolp = colp;
2252   } else {
2253     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2254   }
2255   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2256   /* now we just get the submatrix */
2257   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2258   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2259   /* clean up */
2260   if (pcomm!=comm) {
2261     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2262   }
2263   if (size>1) {
2264     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2265   }
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2271 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2272 {
2273   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2274   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2275 
2276   PetscFunctionBegin;
2277   if (nghosts) *nghosts = B->nbs;
2278   if (ghosts) *ghosts = baij->garray;
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2284 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2285 {
2286   Mat            B;
2287   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2288   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2289   Mat_SeqAIJ     *b;
2290   PetscErrorCode ierr;
2291   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2292   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2293   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2294 
2295   PetscFunctionBegin;
2296   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2297   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2298 
2299   /* ----------------------------------------------------------------
2300      Tell every processor the number of nonzeros per row
2301   */
2302   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2303   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2304     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2305   }
2306   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2307   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2308   displs    = recvcounts + size;
2309   for (i=0; i<size; i++) {
2310     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2311     displs[i]     = A->rmap->range[i]/bs;
2312   }
2313 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2314   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2315 #else
2316   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2317 #endif
2318   /* ---------------------------------------------------------------
2319      Create the sequential matrix of the same type as the local block diagonal
2320   */
2321   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2322   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2323   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2324   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2325   b    = (Mat_SeqAIJ*)B->data;
2326 
2327   /*--------------------------------------------------------------------
2328     Copy my part of matrix column indices over
2329   */
2330   sendcount  = ad->nz + bd->nz;
2331   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2332   a_jsendbuf = ad->j;
2333   b_jsendbuf = bd->j;
2334   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2335   cnt        = 0;
2336   for (i=0; i<n; i++) {
2337 
2338     /* put in lower diagonal portion */
2339     m = bd->i[i+1] - bd->i[i];
2340     while (m > 0) {
2341       /* is it above diagonal (in bd (compressed) numbering) */
2342       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2343       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2344       m--;
2345     }
2346 
2347     /* put in diagonal portion */
2348     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2349       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2350     }
2351 
2352     /* put in upper diagonal portion */
2353     while (m-- > 0) {
2354       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2355     }
2356   }
2357   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2358 
2359   /*--------------------------------------------------------------------
2360     Gather all column indices to all processors
2361   */
2362   for (i=0; i<size; i++) {
2363     recvcounts[i] = 0;
2364     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2365       recvcounts[i] += lens[j];
2366     }
2367   }
2368   displs[0] = 0;
2369   for (i=1; i<size; i++) {
2370     displs[i] = displs[i-1] + recvcounts[i-1];
2371   }
2372 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2373   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2374 #else
2375   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2376 #endif
2377   /*--------------------------------------------------------------------
2378     Assemble the matrix into useable form (note numerical values not yet set)
2379   */
2380   /* set the b->ilen (length of each row) values */
2381   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2382   /* set the b->i indices */
2383   b->i[0] = 0;
2384   for (i=1; i<=A->rmap->N/bs; i++) {
2385     b->i[i] = b->i[i-1] + lens[i-1];
2386   }
2387   ierr = PetscFree(lens);CHKERRQ(ierr);
2388   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2391 
2392   if (A->symmetric) {
2393     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2394   } else if (A->hermitian) {
2395     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2396   } else if (A->structurally_symmetric) {
2397     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2398   }
2399   *newmat = B;
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatSOR_MPIBAIJ"
2405 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2406 {
2407   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2408   PetscErrorCode ierr;
2409   Vec            bb1 = 0;
2410 
2411   PetscFunctionBegin;
2412   if (flag == SOR_APPLY_UPPER) {
2413     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2414     PetscFunctionReturn(0);
2415   }
2416 
2417   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2418     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2419   }
2420 
2421   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2422     if (flag & SOR_ZERO_INITIAL_GUESS) {
2423       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2424       its--;
2425     }
2426 
2427     while (its--) {
2428       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2429       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2430 
2431       /* update rhs: bb1 = bb - B*x */
2432       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2433       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2434 
2435       /* local sweep */
2436       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2437     }
2438   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2439     if (flag & SOR_ZERO_INITIAL_GUESS) {
2440       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2441       its--;
2442     }
2443     while (its--) {
2444       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2445       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2446 
2447       /* update rhs: bb1 = bb - B*x */
2448       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2449       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2450 
2451       /* local sweep */
2452       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2453     }
2454   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2455     if (flag & SOR_ZERO_INITIAL_GUESS) {
2456       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2457       its--;
2458     }
2459     while (its--) {
2460       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2461       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2462 
2463       /* update rhs: bb1 = bb - B*x */
2464       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2465       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2466 
2467       /* local sweep */
2468       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2469     }
2470   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2471 
2472   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 #undef __FUNCT__
2477 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2478 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2479 {
2480   PetscErrorCode ierr;
2481   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2482   PetscInt       N,i,*garray = aij->garray;
2483   PetscInt       ib,jb,bs = A->rmap->bs;
2484   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2485   MatScalar      *a_val = a_aij->a;
2486   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2487   MatScalar      *b_val = b_aij->a;
2488   PetscReal      *work;
2489 
2490   PetscFunctionBegin;
2491   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2492   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2493   if (type == NORM_2) {
2494     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2495       for (jb=0; jb<bs; jb++) {
2496         for (ib=0; ib<bs; ib++) {
2497           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2498           a_val++;
2499         }
2500       }
2501     }
2502     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2503       for (jb=0; jb<bs; jb++) {
2504         for (ib=0; ib<bs; ib++) {
2505           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2506           b_val++;
2507         }
2508       }
2509     }
2510   } else if (type == NORM_1) {
2511     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2512       for (jb=0; jb<bs; jb++) {
2513         for (ib=0; ib<bs; ib++) {
2514           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2515           a_val++;
2516         }
2517       }
2518     }
2519     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2520       for (jb=0; jb<bs; jb++) {
2521        for (ib=0; ib<bs; ib++) {
2522           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2523           b_val++;
2524         }
2525       }
2526     }
2527   } else if (type == NORM_INFINITY) {
2528     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2529       for (jb=0; jb<bs; jb++) {
2530         for (ib=0; ib<bs; ib++) {
2531           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2532           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2533           a_val++;
2534         }
2535       }
2536     }
2537     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2538       for (jb=0; jb<bs; jb++) {
2539         for (ib=0; ib<bs; ib++) {
2540           int col = garray[b_aij->j[i]] * bs + jb;
2541           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2542           b_val++;
2543         }
2544       }
2545     }
2546   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2547   if (type == NORM_INFINITY) {
2548     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2549   } else {
2550     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2551   }
2552   ierr = PetscFree(work);CHKERRQ(ierr);
2553   if (type == NORM_2) {
2554     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2555   }
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 #undef __FUNCT__
2560 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2561 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2562 {
2563   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 
2572 /* -------------------------------------------------------------------*/
2573 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2574                                        MatGetRow_MPIBAIJ,
2575                                        MatRestoreRow_MPIBAIJ,
2576                                        MatMult_MPIBAIJ,
2577                                 /* 4*/ MatMultAdd_MPIBAIJ,
2578                                        MatMultTranspose_MPIBAIJ,
2579                                        MatMultTransposeAdd_MPIBAIJ,
2580                                        0,
2581                                        0,
2582                                        0,
2583                                 /*10*/ 0,
2584                                        0,
2585                                        0,
2586                                        MatSOR_MPIBAIJ,
2587                                        MatTranspose_MPIBAIJ,
2588                                 /*15*/ MatGetInfo_MPIBAIJ,
2589                                        MatEqual_MPIBAIJ,
2590                                        MatGetDiagonal_MPIBAIJ,
2591                                        MatDiagonalScale_MPIBAIJ,
2592                                        MatNorm_MPIBAIJ,
2593                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2594                                        MatAssemblyEnd_MPIBAIJ,
2595                                        MatSetOption_MPIBAIJ,
2596                                        MatZeroEntries_MPIBAIJ,
2597                                 /*24*/ MatZeroRows_MPIBAIJ,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                        0,
2602                                 /*29*/ MatSetUp_MPIBAIJ,
2603                                        0,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                 /*34*/ MatDuplicate_MPIBAIJ,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                        0,
2612                                 /*39*/ MatAXPY_MPIBAIJ,
2613                                        MatGetSubMatrices_MPIBAIJ,
2614                                        MatIncreaseOverlap_MPIBAIJ,
2615                                        MatGetValues_MPIBAIJ,
2616                                        MatCopy_MPIBAIJ,
2617                                 /*44*/ 0,
2618                                        MatScale_MPIBAIJ,
2619                                        0,
2620                                        0,
2621                                        MatZeroRowsColumns_MPIBAIJ,
2622                                 /*49*/ 0,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2628                                        0,
2629                                        MatSetUnfactored_MPIBAIJ,
2630                                        MatPermute_MPIBAIJ,
2631                                        MatSetValuesBlocked_MPIBAIJ,
2632                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2633                                        MatDestroy_MPIBAIJ,
2634                                        MatView_MPIBAIJ,
2635                                        0,
2636                                        0,
2637                                 /*64*/ 0,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                 /*74*/ 0,
2648                                        MatFDColoringApply_BAIJ,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                 /*79*/ 0,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                        MatLoad_MPIBAIJ,
2657                                 /*84*/ 0,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                 /*89*/ 0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                 /*94*/ 0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                 /*99*/ 0,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                 /*104*/0,
2678                                        MatRealPart_MPIBAIJ,
2679                                        MatImaginaryPart_MPIBAIJ,
2680                                        0,
2681                                        0,
2682                                 /*109*/0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2688                                        0,
2689                                        MatGetGhosts_MPIBAIJ,
2690                                        0,
2691                                        0,
2692                                 /*119*/0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        MatGetMultiProcBlock_MPIBAIJ,
2697                                 /*124*/0,
2698                                        MatGetColumnNorms_MPIBAIJ,
2699                                        MatInvertBlockDiagonal_MPIBAIJ,
2700                                        0,
2701                                        0,
2702                                /*129*/ 0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                /*134*/ 0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                /*139*/ 0,
2713                                        0,
2714                                        0,
2715                                        MatFDColoringSetUp_MPIXAIJ,
2716                                        0,
2717                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2718 };
2719 
2720 #undef __FUNCT__
2721 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2722 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2723 {
2724   PetscFunctionBegin;
2725   *a = ((Mat_MPIBAIJ*)A->data)->A;
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2730 
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2733 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2734 {
2735   PetscInt       m,rstart,cstart,cend;
2736   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2737   const PetscInt *JJ    =0;
2738   PetscScalar    *values=0;
2739   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2744   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2745   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2746   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2747   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2748   m      = B->rmap->n/bs;
2749   rstart = B->rmap->rstart/bs;
2750   cstart = B->cmap->rstart/bs;
2751   cend   = B->cmap->rend/bs;
2752 
2753   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2754   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2755   for (i=0; i<m; i++) {
2756     nz = ii[i+1] - ii[i];
2757     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2758     nz_max = PetscMax(nz_max,nz);
2759     JJ     = jj + ii[i];
2760     for (j=0; j<nz; j++) {
2761       if (*JJ >= cstart) break;
2762       JJ++;
2763     }
2764     d = 0;
2765     for (; j<nz; j++) {
2766       if (*JJ++ >= cend) break;
2767       d++;
2768     }
2769     d_nnz[i] = d;
2770     o_nnz[i] = nz - d;
2771   }
2772   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2773   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2774 
2775   values = (PetscScalar*)V;
2776   if (!values) {
2777     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2778     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2779   }
2780   for (i=0; i<m; i++) {
2781     PetscInt          row    = i + rstart;
2782     PetscInt          ncols  = ii[i+1] - ii[i];
2783     const PetscInt    *icols = jj + ii[i];
2784     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2785       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2786       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2787     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2788       PetscInt j;
2789       for (j=0; j<ncols; j++) {
2790         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2791         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2792       }
2793     }
2794   }
2795 
2796   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2797   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2798   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2799   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2805 /*@C
2806    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2807    (the default parallel PETSc format).
2808 
2809    Collective on MPI_Comm
2810 
2811    Input Parameters:
2812 +  B - the matrix
2813 .  bs - the block size
2814 .  i - the indices into j for the start of each local row (starts with zero)
2815 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2816 -  v - optional values in the matrix
2817 
2818    Level: developer
2819 
2820    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2821    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2822    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2823    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2824    block column and the second index is over columns within a block.
2825 
2826 .keywords: matrix, aij, compressed row, sparse, parallel
2827 
2828 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2829 @*/
2830 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2831 {
2832   PetscErrorCode ierr;
2833 
2834   PetscFunctionBegin;
2835   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2836   PetscValidType(B,1);
2837   PetscValidLogicalCollectiveInt(B,bs,2);
2838   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 #undef __FUNCT__
2843 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2844 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2845 {
2846   Mat_MPIBAIJ    *b;
2847   PetscErrorCode ierr;
2848   PetscInt       i;
2849 
2850   PetscFunctionBegin;
2851   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2852   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2853   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2854   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2855 
2856   if (d_nnz) {
2857     for (i=0; i<B->rmap->n/bs; i++) {
2858       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2859     }
2860   }
2861   if (o_nnz) {
2862     for (i=0; i<B->rmap->n/bs; i++) {
2863       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2864     }
2865   }
2866 
2867   b      = (Mat_MPIBAIJ*)B->data;
2868   b->bs2 = bs*bs;
2869   b->mbs = B->rmap->n/bs;
2870   b->nbs = B->cmap->n/bs;
2871   b->Mbs = B->rmap->N/bs;
2872   b->Nbs = B->cmap->N/bs;
2873 
2874   for (i=0; i<=b->size; i++) {
2875     b->rangebs[i] = B->rmap->range[i]/bs;
2876   }
2877   b->rstartbs = B->rmap->rstart/bs;
2878   b->rendbs   = B->rmap->rend/bs;
2879   b->cstartbs = B->cmap->rstart/bs;
2880   b->cendbs   = B->cmap->rend/bs;
2881 
2882   if (!B->preallocated) {
2883     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2884     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2885     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2886     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2887     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2888     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2889     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2890     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2891     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2892   }
2893 
2894   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2895   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2896   B->preallocated = PETSC_TRUE;
2897   PetscFunctionReturn(0);
2898 }
2899 
2900 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2901 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2902 
2903 #undef __FUNCT__
2904 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2905 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2906 {
2907   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2908   PetscErrorCode ierr;
2909   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2910   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2911   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2912 
2913   PetscFunctionBegin;
2914   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2915   ii[0] = 0;
2916   for (i=0; i<M; i++) {
2917     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2918     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2919     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2920     /* remove one from count of matrix has diagonal */
2921     for (j=id[i]; j<id[i+1]; j++) {
2922       if (jd[j] == i) {ii[i+1]--;break;}
2923     }
2924   }
2925   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2926   cnt  = 0;
2927   for (i=0; i<M; i++) {
2928     for (j=io[i]; j<io[i+1]; j++) {
2929       if (garray[jo[j]] > rstart) break;
2930       jj[cnt++] = garray[jo[j]];
2931     }
2932     for (k=id[i]; k<id[i+1]; k++) {
2933       if (jd[k] != i) {
2934         jj[cnt++] = rstart + jd[k];
2935       }
2936     }
2937     for (; j<io[i+1]; j++) {
2938       jj[cnt++] = garray[jo[j]];
2939     }
2940   }
2941   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2946 
2947 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2948 
2949 #undef __FUNCT__
2950 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2951 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2952 {
2953   PetscErrorCode ierr;
2954   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2955   Mat            B;
2956   Mat_MPIAIJ     *b;
2957 
2958   PetscFunctionBegin;
2959   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2960 
2961   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2962   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2963   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2964   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2965   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2966   b    = (Mat_MPIAIJ*) B->data;
2967 
2968   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2969   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2970   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2971   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2972   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2973   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2974   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977   if (reuse == MAT_REUSE_MATRIX) {
2978     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2979   } else {
2980    *newmat = B;
2981   }
2982   PetscFunctionReturn(0);
2983 }
2984 
2985 #if defined(PETSC_HAVE_MUMPS)
2986 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2987 #endif
2988 
2989 /*MC
2990    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2991 
2992    Options Database Keys:
2993 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2994 . -mat_block_size <bs> - set the blocksize used to store the matrix
2995 - -mat_use_hash_table <fact>
2996 
2997   Level: beginner
2998 
2999 .seealso: MatCreateMPIBAIJ
3000 M*/
3001 
3002 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3003 
3004 #undef __FUNCT__
3005 #define __FUNCT__ "MatCreate_MPIBAIJ"
3006 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3007 {
3008   Mat_MPIBAIJ    *b;
3009   PetscErrorCode ierr;
3010   PetscBool      flg;
3011 
3012   PetscFunctionBegin;
3013   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3014   B->data = (void*)b;
3015 
3016   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3017   B->assembled = PETSC_FALSE;
3018 
3019   B->insertmode = NOT_SET_VALUES;
3020   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3021   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3022 
3023   /* build local table of row and column ownerships */
3024   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
3025 
3026   /* build cache for off array entries formed */
3027   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3028 
3029   b->donotstash  = PETSC_FALSE;
3030   b->colmap      = NULL;
3031   b->garray      = NULL;
3032   b->roworiented = PETSC_TRUE;
3033 
3034   /* stuff used in block assembly */
3035   b->barray = 0;
3036 
3037   /* stuff used for matrix vector multiply */
3038   b->lvec  = 0;
3039   b->Mvctx = 0;
3040 
3041   /* stuff for MatGetRow() */
3042   b->rowindices   = 0;
3043   b->rowvalues    = 0;
3044   b->getrowactive = PETSC_FALSE;
3045 
3046   /* hash table stuff */
3047   b->ht           = 0;
3048   b->hd           = 0;
3049   b->ht_size      = 0;
3050   b->ht_flag      = PETSC_FALSE;
3051   b->ht_fact      = 0;
3052   b->ht_total_ct  = 0;
3053   b->ht_insert_ct = 0;
3054 
3055   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3056   b->ijonly = PETSC_FALSE;
3057 
3058   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3059   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3060   if (flg) {
3061     PetscReal fact = 1.39;
3062     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3063     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3064     if (fact <= 1.0) fact = 1.39;
3065     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3066     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3067   }
3068   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3069 
3070 #if defined(PETSC_HAVE_MUMPS)
3071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3072 #endif
3073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3076   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3084   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3085   PetscFunctionReturn(0);
3086 }
3087 
3088 /*MC
3089    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3090 
3091    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3092    and MATMPIBAIJ otherwise.
3093 
3094    Options Database Keys:
3095 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3096 
3097   Level: beginner
3098 
3099 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3100 M*/
3101 
3102 #undef __FUNCT__
3103 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3104 /*@C
3105    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3106    (block compressed row).  For good matrix assembly performance
3107    the user should preallocate the matrix storage by setting the parameters
3108    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3109    performance can be increased by more than a factor of 50.
3110 
3111    Collective on Mat
3112 
3113    Input Parameters:
3114 +  B - the matrix
3115 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3116           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3117 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3118            submatrix  (same for all local rows)
3119 .  d_nnz - array containing the number of block nonzeros in the various block rows
3120            of the in diagonal portion of the local (possibly different for each block
3121            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3122            set it even if it is zero.
3123 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3124            submatrix (same for all local rows).
3125 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3126            off-diagonal portion of the local submatrix (possibly different for
3127            each block row) or NULL.
3128 
3129    If the *_nnz parameter is given then the *_nz parameter is ignored
3130 
3131    Options Database Keys:
3132 +   -mat_block_size - size of the blocks to use
3133 -   -mat_use_hash_table <fact>
3134 
3135    Notes:
3136    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3137    than it must be used on all processors that share the object for that argument.
3138 
3139    Storage Information:
3140    For a square global matrix we define each processor's diagonal portion
3141    to be its local rows and the corresponding columns (a square submatrix);
3142    each processor's off-diagonal portion encompasses the remainder of the
3143    local matrix (a rectangular submatrix).
3144 
3145    The user can specify preallocated storage for the diagonal part of
3146    the local submatrix with either d_nz or d_nnz (not both).  Set
3147    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3148    memory allocation.  Likewise, specify preallocated storage for the
3149    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3150 
3151    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3152    the figure below we depict these three local rows and all columns (0-11).
3153 
3154 .vb
3155            0 1 2 3 4 5 6 7 8 9 10 11
3156           --------------------------
3157    row 3  |o o o d d d o o o o  o  o
3158    row 4  |o o o d d d o o o o  o  o
3159    row 5  |o o o d d d o o o o  o  o
3160           --------------------------
3161 .ve
3162 
3163    Thus, any entries in the d locations are stored in the d (diagonal)
3164    submatrix, and any entries in the o locations are stored in the
3165    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3166    stored simply in the MATSEQBAIJ format for compressed row storage.
3167 
3168    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3169    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3170    In general, for PDE problems in which most nonzeros are near the diagonal,
3171    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3172    or you will get TERRIBLE performance; see the users' manual chapter on
3173    matrices.
3174 
3175    You can call MatGetInfo() to get information on how effective the preallocation was;
3176    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3177    You can also run with the option -info and look for messages with the string
3178    malloc in them to see if additional memory allocation was needed.
3179 
3180    Level: intermediate
3181 
3182 .keywords: matrix, block, aij, compressed row, sparse, parallel
3183 
3184 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3185 @*/
3186 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3187 {
3188   PetscErrorCode ierr;
3189 
3190   PetscFunctionBegin;
3191   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3192   PetscValidType(B,1);
3193   PetscValidLogicalCollectiveInt(B,bs,2);
3194   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3195   PetscFunctionReturn(0);
3196 }
3197 
3198 #undef __FUNCT__
3199 #define __FUNCT__ "MatCreateBAIJ"
3200 /*@C
3201    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3202    (block compressed row).  For good matrix assembly performance
3203    the user should preallocate the matrix storage by setting the parameters
3204    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3205    performance can be increased by more than a factor of 50.
3206 
3207    Collective on MPI_Comm
3208 
3209    Input Parameters:
3210 +  comm - MPI communicator
3211 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3212           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3213 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3214            This value should be the same as the local size used in creating the
3215            y vector for the matrix-vector product y = Ax.
3216 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3217            This value should be the same as the local size used in creating the
3218            x vector for the matrix-vector product y = Ax.
3219 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3220 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3221 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3222            submatrix  (same for all local rows)
3223 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3224            of the in diagonal portion of the local (possibly different for each block
3225            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3226            and set it even if it is zero.
3227 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3228            submatrix (same for all local rows).
3229 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3230            off-diagonal portion of the local submatrix (possibly different for
3231            each block row) or NULL.
3232 
3233    Output Parameter:
3234 .  A - the matrix
3235 
3236    Options Database Keys:
3237 +   -mat_block_size - size of the blocks to use
3238 -   -mat_use_hash_table <fact>
3239 
3240    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3241    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3242    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3243 
3244    Notes:
3245    If the *_nnz parameter is given then the *_nz parameter is ignored
3246 
3247    A nonzero block is any block that as 1 or more nonzeros in it
3248 
3249    The user MUST specify either the local or global matrix dimensions
3250    (possibly both).
3251 
3252    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3253    than it must be used on all processors that share the object for that argument.
3254 
3255    Storage Information:
3256    For a square global matrix we define each processor's diagonal portion
3257    to be its local rows and the corresponding columns (a square submatrix);
3258    each processor's off-diagonal portion encompasses the remainder of the
3259    local matrix (a rectangular submatrix).
3260 
3261    The user can specify preallocated storage for the diagonal part of
3262    the local submatrix with either d_nz or d_nnz (not both).  Set
3263    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3264    memory allocation.  Likewise, specify preallocated storage for the
3265    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3266 
3267    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3268    the figure below we depict these three local rows and all columns (0-11).
3269 
3270 .vb
3271            0 1 2 3 4 5 6 7 8 9 10 11
3272           --------------------------
3273    row 3  |o o o d d d o o o o  o  o
3274    row 4  |o o o d d d o o o o  o  o
3275    row 5  |o o o d d d o o o o  o  o
3276           --------------------------
3277 .ve
3278 
3279    Thus, any entries in the d locations are stored in the d (diagonal)
3280    submatrix, and any entries in the o locations are stored in the
3281    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3282    stored simply in the MATSEQBAIJ format for compressed row storage.
3283 
3284    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3285    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3286    In general, for PDE problems in which most nonzeros are near the diagonal,
3287    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3288    or you will get TERRIBLE performance; see the users' manual chapter on
3289    matrices.
3290 
3291    Level: intermediate
3292 
3293 .keywords: matrix, block, aij, compressed row, sparse, parallel
3294 
3295 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3296 @*/
3297 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3298 {
3299   PetscErrorCode ierr;
3300   PetscMPIInt    size;
3301 
3302   PetscFunctionBegin;
3303   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3304   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3305   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3306   if (size > 1) {
3307     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3308     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3309   } else {
3310     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3311     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3312   }
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 #undef __FUNCT__
3317 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3318 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3319 {
3320   Mat            mat;
3321   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3322   PetscErrorCode ierr;
3323   PetscInt       len=0;
3324 
3325   PetscFunctionBegin;
3326   *newmat = 0;
3327   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3328   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3329   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3330   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3331 
3332   mat->factortype   = matin->factortype;
3333   mat->preallocated = PETSC_TRUE;
3334   mat->assembled    = PETSC_TRUE;
3335   mat->insertmode   = NOT_SET_VALUES;
3336 
3337   a             = (Mat_MPIBAIJ*)mat->data;
3338   mat->rmap->bs = matin->rmap->bs;
3339   a->bs2        = oldmat->bs2;
3340   a->mbs        = oldmat->mbs;
3341   a->nbs        = oldmat->nbs;
3342   a->Mbs        = oldmat->Mbs;
3343   a->Nbs        = oldmat->Nbs;
3344 
3345   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3346   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3347 
3348   a->size         = oldmat->size;
3349   a->rank         = oldmat->rank;
3350   a->donotstash   = oldmat->donotstash;
3351   a->roworiented  = oldmat->roworiented;
3352   a->rowindices   = 0;
3353   a->rowvalues    = 0;
3354   a->getrowactive = PETSC_FALSE;
3355   a->barray       = 0;
3356   a->rstartbs     = oldmat->rstartbs;
3357   a->rendbs       = oldmat->rendbs;
3358   a->cstartbs     = oldmat->cstartbs;
3359   a->cendbs       = oldmat->cendbs;
3360 
3361   /* hash table stuff */
3362   a->ht           = 0;
3363   a->hd           = 0;
3364   a->ht_size      = 0;
3365   a->ht_flag      = oldmat->ht_flag;
3366   a->ht_fact      = oldmat->ht_fact;
3367   a->ht_total_ct  = 0;
3368   a->ht_insert_ct = 0;
3369 
3370   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3371   if (oldmat->colmap) {
3372 #if defined(PETSC_USE_CTABLE)
3373     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3374 #else
3375     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3376     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3377     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3378 #endif
3379   } else a->colmap = 0;
3380 
3381   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3382     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3383     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3384     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3385   } else a->garray = 0;
3386 
3387   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3388   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3389   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3390   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3391   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3392 
3393   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3394   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3395   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3396   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3397   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3398   *newmat = mat;
3399   PetscFunctionReturn(0);
3400 }
3401 
3402 #undef __FUNCT__
3403 #define __FUNCT__ "MatLoad_MPIBAIJ"
3404 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3405 {
3406   PetscErrorCode ierr;
3407   int            fd;
3408   PetscInt       i,nz,j,rstart,rend;
3409   PetscScalar    *vals,*buf;
3410   MPI_Comm       comm;
3411   MPI_Status     status;
3412   PetscMPIInt    rank,size,maxnz;
3413   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3414   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3415   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3416   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3417   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3418   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3419 
3420   PetscFunctionBegin;
3421   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3422   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3423   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3424   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3425 
3426   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3427   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3428   if (!rank) {
3429     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3430     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3431     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3432   }
3433 
3434   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3435 
3436   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3437   M    = header[1]; N = header[2];
3438 
3439   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3440   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3441   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3442 
3443   /* If global sizes are set, check if they are consistent with that given in the file */
3444   if (sizesset) {
3445     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3446   }
3447   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3448   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3449 
3450   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3451 
3452   /*
3453      This code adds extra rows to make sure the number of rows is
3454      divisible by the blocksize
3455   */
3456   Mbs        = M/bs;
3457   extra_rows = bs - M + bs*Mbs;
3458   if (extra_rows == bs) extra_rows = 0;
3459   else                  Mbs++;
3460   if (extra_rows && !rank) {
3461     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3462   }
3463 
3464   /* determine ownership of all rows */
3465   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3466     mbs = Mbs/size + ((Mbs % size) > rank);
3467     m   = mbs*bs;
3468   } else { /* User set */
3469     m   = newmat->rmap->n;
3470     mbs = m/bs;
3471   }
3472   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3473   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3474 
3475   /* process 0 needs enough room for process with most rows */
3476   if (!rank) {
3477     mmax = rowners[1];
3478     for (i=2; i<=size; i++) {
3479       mmax = PetscMax(mmax,rowners[i]);
3480     }
3481     mmax*=bs;
3482   } else mmax = -1;             /* unused, but compiler warns anyway */
3483 
3484   rowners[0] = 0;
3485   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3486   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3487   rstart = rowners[rank];
3488   rend   = rowners[rank+1];
3489 
3490   /* distribute row lengths to all processors */
3491   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3492   if (!rank) {
3493     mend = m;
3494     if (size == 1) mend = mend - extra_rows;
3495     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3496     for (j=mend; j<m; j++) locrowlens[j] = 1;
3497     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3498     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3499     for (j=0; j<m; j++) {
3500       procsnz[0] += locrowlens[j];
3501     }
3502     for (i=1; i<size; i++) {
3503       mend = browners[i+1] - browners[i];
3504       if (i == size-1) mend = mend - extra_rows;
3505       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3506       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3507       /* calculate the number of nonzeros on each processor */
3508       for (j=0; j<browners[i+1]-browners[i]; j++) {
3509         procsnz[i] += rowlengths[j];
3510       }
3511       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3512     }
3513     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3514   } else {
3515     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3516   }
3517 
3518   if (!rank) {
3519     /* determine max buffer needed and allocate it */
3520     maxnz = procsnz[0];
3521     for (i=1; i<size; i++) {
3522       maxnz = PetscMax(maxnz,procsnz[i]);
3523     }
3524     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3525 
3526     /* read in my part of the matrix column indices  */
3527     nz     = procsnz[0];
3528     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3529     mycols = ibuf;
3530     if (size == 1) nz -= extra_rows;
3531     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3532     if (size == 1) {
3533       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3534     }
3535 
3536     /* read in every ones (except the last) and ship off */
3537     for (i=1; i<size-1; i++) {
3538       nz   = procsnz[i];
3539       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3540       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3541     }
3542     /* read in the stuff for the last proc */
3543     if (size != 1) {
3544       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3545       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3546       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3547       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3548     }
3549     ierr = PetscFree(cols);CHKERRQ(ierr);
3550   } else {
3551     /* determine buffer space needed for message */
3552     nz = 0;
3553     for (i=0; i<m; i++) {
3554       nz += locrowlens[i];
3555     }
3556     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3557     mycols = ibuf;
3558     /* receive message of column indices*/
3559     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3560     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3561     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3562   }
3563 
3564   /* loop over local rows, determining number of off diagonal entries */
3565   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3566   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3567   rowcount = 0; nzcount = 0;
3568   for (i=0; i<mbs; i++) {
3569     dcount  = 0;
3570     odcount = 0;
3571     for (j=0; j<bs; j++) {
3572       kmax = locrowlens[rowcount];
3573       for (k=0; k<kmax; k++) {
3574         tmp = mycols[nzcount++]/bs;
3575         if (!mask[tmp]) {
3576           mask[tmp] = 1;
3577           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3578           else masked1[dcount++] = tmp;
3579         }
3580       }
3581       rowcount++;
3582     }
3583 
3584     dlens[i]  = dcount;
3585     odlens[i] = odcount;
3586 
3587     /* zero out the mask elements we set */
3588     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3589     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3590   }
3591 
3592 
3593   if (!sizesset) {
3594     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3595   }
3596   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3597 
3598   if (!rank) {
3599     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3600     /* read in my part of the matrix numerical values  */
3601     nz     = procsnz[0];
3602     vals   = buf;
3603     mycols = ibuf;
3604     if (size == 1) nz -= extra_rows;
3605     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3606     if (size == 1) {
3607       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3608     }
3609 
3610     /* insert into matrix */
3611     jj = rstart*bs;
3612     for (i=0; i<m; i++) {
3613       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3614       mycols += locrowlens[i];
3615       vals   += locrowlens[i];
3616       jj++;
3617     }
3618     /* read in other processors (except the last one) and ship out */
3619     for (i=1; i<size-1; i++) {
3620       nz   = procsnz[i];
3621       vals = buf;
3622       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3623       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3624     }
3625     /* the last proc */
3626     if (size != 1) {
3627       nz   = procsnz[i] - extra_rows;
3628       vals = buf;
3629       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3630       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3631       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3632     }
3633     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3634   } else {
3635     /* receive numeric values */
3636     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3637 
3638     /* receive message of values*/
3639     vals   = buf;
3640     mycols = ibuf;
3641     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3642 
3643     /* insert into matrix */
3644     jj = rstart*bs;
3645     for (i=0; i<m; i++) {
3646       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3647       mycols += locrowlens[i];
3648       vals   += locrowlens[i];
3649       jj++;
3650     }
3651   }
3652   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3653   ierr = PetscFree(buf);CHKERRQ(ierr);
3654   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3655   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3656   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3657   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3658   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3659   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3660   PetscFunctionReturn(0);
3661 }
3662 
3663 #undef __FUNCT__
3664 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3665 /*@
3666    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3667 
3668    Input Parameters:
3669 .  mat  - the matrix
3670 .  fact - factor
3671 
3672    Not Collective, each process can use a different factor
3673 
3674    Level: advanced
3675 
3676   Notes:
3677    This can also be set by the command line option: -mat_use_hash_table <fact>
3678 
3679 .keywords: matrix, hashtable, factor, HT
3680 
3681 .seealso: MatSetOption()
3682 @*/
3683 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3684 {
3685   PetscErrorCode ierr;
3686 
3687   PetscFunctionBegin;
3688   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3689   PetscFunctionReturn(0);
3690 }
3691 
3692 #undef __FUNCT__
3693 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3694 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3695 {
3696   Mat_MPIBAIJ *baij;
3697 
3698   PetscFunctionBegin;
3699   baij          = (Mat_MPIBAIJ*)mat->data;
3700   baij->ht_fact = fact;
3701   PetscFunctionReturn(0);
3702 }
3703 
3704 #undef __FUNCT__
3705 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3706 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3707 {
3708   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3709 
3710   PetscFunctionBegin;
3711   if (Ad)     *Ad     = a->A;
3712   if (Ao)     *Ao     = a->B;
3713   if (colmap) *colmap = a->garray;
3714   PetscFunctionReturn(0);
3715 }
3716 
3717 /*
3718     Special version for direct calls from Fortran (to eliminate two function call overheads
3719 */
3720 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3721 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3722 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3723 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3724 #endif
3725 
3726 #undef __FUNCT__
3727 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3728 /*@C
3729   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3730 
3731   Collective on Mat
3732 
3733   Input Parameters:
3734 + mat - the matrix
3735 . min - number of input rows
3736 . im - input rows
3737 . nin - number of input columns
3738 . in - input columns
3739 . v - numerical values input
3740 - addvin - INSERT_VALUES or ADD_VALUES
3741 
3742   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3743 
3744   Level: advanced
3745 
3746 .seealso:   MatSetValuesBlocked()
3747 @*/
3748 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3749 {
3750   /* convert input arguments to C version */
3751   Mat        mat  = *matin;
3752   PetscInt   m    = *min, n = *nin;
3753   InsertMode addv = *addvin;
3754 
3755   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3756   const MatScalar *value;
3757   MatScalar       *barray     = baij->barray;
3758   PetscBool       roworiented = baij->roworiented;
3759   PetscErrorCode  ierr;
3760   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3761   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3762   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3763 
3764   PetscFunctionBegin;
3765   /* tasks normally handled by MatSetValuesBlocked() */
3766   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3767 #if defined(PETSC_USE_DEBUG)
3768   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3769   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3770 #endif
3771   if (mat->assembled) {
3772     mat->was_assembled = PETSC_TRUE;
3773     mat->assembled     = PETSC_FALSE;
3774   }
3775   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3776 
3777 
3778   if (!barray) {
3779     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3780     baij->barray = barray;
3781   }
3782 
3783   if (roworiented) stepval = (n-1)*bs;
3784   else stepval = (m-1)*bs;
3785 
3786   for (i=0; i<m; i++) {
3787     if (im[i] < 0) continue;
3788 #if defined(PETSC_USE_DEBUG)
3789     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3790 #endif
3791     if (im[i] >= rstart && im[i] < rend) {
3792       row = im[i] - rstart;
3793       for (j=0; j<n; j++) {
3794         /* If NumCol = 1 then a copy is not required */
3795         if ((roworiented) && (n == 1)) {
3796           barray = (MatScalar*)v + i*bs2;
3797         } else if ((!roworiented) && (m == 1)) {
3798           barray = (MatScalar*)v + j*bs2;
3799         } else { /* Here a copy is required */
3800           if (roworiented) {
3801             value = v + i*(stepval+bs)*bs + j*bs;
3802           } else {
3803             value = v + j*(stepval+bs)*bs + i*bs;
3804           }
3805           for (ii=0; ii<bs; ii++,value+=stepval) {
3806             for (jj=0; jj<bs; jj++) {
3807               *barray++ = *value++;
3808             }
3809           }
3810           barray -=bs2;
3811         }
3812 
3813         if (in[j] >= cstart && in[j] < cend) {
3814           col  = in[j] - cstart;
3815           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3816         } else if (in[j] < 0) continue;
3817 #if defined(PETSC_USE_DEBUG)
3818         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3819 #endif
3820         else {
3821           if (mat->was_assembled) {
3822             if (!baij->colmap) {
3823               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3824             }
3825 
3826 #if defined(PETSC_USE_DEBUG)
3827 #if defined(PETSC_USE_CTABLE)
3828             { PetscInt data;
3829               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3830               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3831             }
3832 #else
3833             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3834 #endif
3835 #endif
3836 #if defined(PETSC_USE_CTABLE)
3837             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3838             col  = (col - 1)/bs;
3839 #else
3840             col = (baij->colmap[in[j]] - 1)/bs;
3841 #endif
3842             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3843               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3844               col  =  in[j];
3845             }
3846           } else col = in[j];
3847           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3848         }
3849       }
3850     } else {
3851       if (!baij->donotstash) {
3852         if (roworiented) {
3853           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3854         } else {
3855           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3856         }
3857       }
3858     }
3859   }
3860 
3861   /* task normally handled by MatSetValuesBlocked() */
3862   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3863   PetscFunctionReturn(0);
3864 }
3865 
3866 #undef __FUNCT__
3867 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3868 /*@
3869      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3870          CSR format the local rows.
3871 
3872    Collective on MPI_Comm
3873 
3874    Input Parameters:
3875 +  comm - MPI communicator
3876 .  bs - the block size, only a block size of 1 is supported
3877 .  m - number of local rows (Cannot be PETSC_DECIDE)
3878 .  n - This value should be the same as the local size used in creating the
3879        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3880        calculated if N is given) For square matrices n is almost always m.
3881 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3882 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3883 .   i - row indices
3884 .   j - column indices
3885 -   a - matrix values
3886 
3887    Output Parameter:
3888 .   mat - the matrix
3889 
3890    Level: intermediate
3891 
3892    Notes:
3893        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3894      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3895      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3896 
3897      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3898      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3899      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3900      with column-major ordering within blocks.
3901 
3902        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3903 
3904 .keywords: matrix, aij, compressed row, sparse, parallel
3905 
3906 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3907           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3908 @*/
3909 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3910 {
3911   PetscErrorCode ierr;
3912 
3913   PetscFunctionBegin;
3914   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3915   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3916   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3917   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3918   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3919   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3920   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3921   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3922   PetscFunctionReturn(0);
3923 }
3924 
3925 #undef __FUNCT__
3926 #define __FUNCT__ "MatCreateMPIBAIJConcatenateSeqBAIJSymbolic"
3927 PetscErrorCode MatCreateMPIBAIJConcatenateSeqBAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
3928 {
3929   PetscErrorCode ierr;
3930   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3931   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
3932   PetscInt       *indx,*bindx,rmax=a->rmax,j;
3933 
3934 
3935   PetscFunctionBegin;
3936   /* This routine will ONLY return MPIBAIJ type matrix */
3937   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3938   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3939   m = m/bs; N = N/cbs;
3940   if (n == PETSC_DECIDE) {
3941     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3942   }
3943   /* Check sum(n) = N */
3944   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3945   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
3946 
3947   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3948   rstart -= m;
3949 
3950   ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3951   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3952   for (i=0; i<m; i++) {
3953     ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3954     nnz = nnz/bs;
3955     for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3956     ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3957     ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3958   }
3959   ierr = PetscFree(bindx);CHKERRQ(ierr);
3960 
3961   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3962   ierr = MatSetSizes(*outmat,m*bs,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3963   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3964   ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr);
3965   ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3966   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3967   PetscFunctionReturn(0);
3968 }
3969 
3970 #undef __FUNCT__
3971 #define __FUNCT__ "MatCreateMPIBAIJConcatenateSeqBAIJNumeric"
3972 PetscErrorCode MatCreateMPIBAIJConcatenateSeqBAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
3973 {
3974   PetscErrorCode ierr;
3975   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3976   PetscInt       *indx;
3977   PetscScalar    *values;
3978 
3979   PetscFunctionBegin;
3980   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3981    ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3982   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
3983 
3984   for (i=0; i<m; i++) {
3985     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3986     Ii   = i + rstart;
3987     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3988     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3989   }
3990   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3991   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3992   PetscFunctionReturn(0);
3993 }
3994 
3995 #undef __FUNCT__
3996 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ"
3997 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3998 {
3999   PetscErrorCode ierr;
4000   PetscMPIInt    size;
4001 
4002   PetscFunctionBegin;
4003   /* same as MatCreateMPIAIJConcatenateSeqAIJ() */
4004   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4005   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4006   if (size == 1) {
4007     if (scall == MAT_INITIAL_MATRIX) {
4008       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4009     } else {
4010       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4011     }
4012   } else {
4013     if (scall == MAT_INITIAL_MATRIX) {
4014       ierr = MatCreateMPIBAIJConcatenateSeqBAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4015     }
4016     ierr = MatCreateMPIBAIJConcatenateSeqBAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4017   }
4018   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4019   PetscFunctionReturn(0);
4020 }
4021