xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a34163a40bd715823141060f8cdc39ecd9525219)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined(PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col  =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B    = baij->B;
249               b    = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba   =b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray     = baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) stepval = (n-1)*bs;
296   else stepval = (m-1)*bs;
297 
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if ((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315             value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend) {
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         } else if (in[j] < 0) continue;
328 #if defined(PETSC_USE_DEBUG)
329         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
330 #endif
331         else {
332           if (mat->was_assembled) {
333             if (!baij->colmap) {
334               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
335             }
336 
337 #if defined(PETSC_USE_DEBUG)
338 #if defined(PETSC_USE_CTABLE)
339             { PetscInt data;
340               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
341               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
342             }
343 #else
344             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345 #endif
346 #endif
347 #if defined(PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
349             col  = (col - 1)/bs;
350 #else
351             col = (baij->colmap[in[j]] - 1)/bs;
352 #endif
353             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
354               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
355               col  =  in[j];
356             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
357           } else col = in[j];
358           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
359         }
360       }
361     } else {
362       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
363       if (!baij->donotstash) {
364         if (roworiented) {
365           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
366         } else {
367           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 #define HASH_KEY 0.6180339887
376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
382 {
383   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
384   PetscBool      roworiented = baij->roworiented;
385   PetscErrorCode ierr;
386   PetscInt       i,j,row,col;
387   PetscInt       rstart_orig=mat->rmap->rstart;
388   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
389   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
390   PetscReal      tmp;
391   MatScalar      **HD = baij->hd,value;
392 #if defined(PETSC_USE_DEBUG)
393   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
394 #endif
395 
396   PetscFunctionBegin;
397   for (i=0; i<m; i++) {
398 #if defined(PETSC_USE_DEBUG)
399     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
400     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
401 #endif
402     row = im[i];
403     if (row >= rstart_orig && row < rend_orig) {
404       for (j=0; j<n; j++) {
405         col = in[j];
406         if (roworiented) value = v[i*n+j];
407         else             value = v[i+j*m];
408         /* Look up PetscInto the Hash Table */
409         key = (row/bs)*Nbs+(col/bs)+1;
410         h1  = HASH(size,key,tmp);
411 
412 
413         idx = h1;
414 #if defined(PETSC_USE_DEBUG)
415         insert_ct++;
416         total_ct++;
417         if (HT[idx] != key) {
418           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
419           if (idx == size) {
420             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
421             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
427           if (idx == size) {
428             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
429             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
430           }
431         }
432 #endif
433         /* A HASH table entry is found, so insert the values at the correct address */
434         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
435         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
436       }
437     } else if (!baij->donotstash) {
438       if (roworiented) {
439         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
440       } else {
441         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
442       }
443     }
444   }
445 #if defined(PETSC_USE_DEBUG)
446   baij->ht_total_ct  = total_ct;
447   baij->ht_insert_ct = insert_ct;
448 #endif
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
454 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
457   PetscBool         roworiented = baij->roworiented;
458   PetscErrorCode    ierr;
459   PetscInt          i,j,ii,jj,row,col;
460   PetscInt          rstart=baij->rstartbs;
461   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
462   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
463   PetscReal         tmp;
464   MatScalar         **HD = baij->hd,*baij_a;
465   const PetscScalar *v_t,*value;
466 #if defined(PETSC_USE_DEBUG)
467   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
468 #endif
469 
470   PetscFunctionBegin;
471   if (roworiented) stepval = (n-1)*bs;
472   else stepval = (m-1)*bs;
473 
474   for (i=0; i<m; i++) {
475 #if defined(PETSC_USE_DEBUG)
476     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
477     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
478 #endif
479     row = im[i];
480     v_t = v + i*nbs2;
481     if (row >= rstart && row < rend) {
482       for (j=0; j<n; j++) {
483         col = in[j];
484 
485         /* Look up into the Hash Table */
486         key = row*Nbs+col+1;
487         h1  = HASH(size,key,tmp);
488 
489         idx = h1;
490 #if defined(PETSC_USE_DEBUG)
491         total_ct++;
492         insert_ct++;
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #else
501         if (HT[idx] != key) {
502           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
503           if (idx == size) {
504             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
505             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
506           }
507         }
508 #endif
509         baij_a = HD[idx];
510         if (roworiented) {
511           /*value = v + i*(stepval+bs)*bs + j*bs;*/
512           /* value = v + (i*(stepval+bs)+j)*bs; */
513           value = v_t;
514           v_t  += bs;
515           if (addv == ADD_VALUES) {
516             for (ii=0; ii<bs; ii++,value+=stepval) {
517               for (jj=ii; jj<bs2; jj+=bs) {
518                 baij_a[jj] += *value++;
519               }
520             }
521           } else {
522             for (ii=0; ii<bs; ii++,value+=stepval) {
523               for (jj=ii; jj<bs2; jj+=bs) {
524                 baij_a[jj] = *value++;
525               }
526             }
527           }
528         } else {
529           value = v + j*(stepval+bs)*bs + i*bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
532               for (jj=0; jj<bs; jj++) {
533                 baij_a[jj] += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
538               for (jj=0; jj<bs; jj++) {
539                 baij_a[jj] = *value++;
540               }
541             }
542           }
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555 #if defined(PETSC_USE_DEBUG)
556   baij->ht_total_ct  = total_ct;
557   baij->ht_insert_ct = insert_ct;
558 #endif
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatGetValues_MPIBAIJ"
564 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
565 {
566   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscErrorCode ierr;
568   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
569   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
574     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
575     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
576       row = idxm[i] - bsrstart;
577       for (j=0; j<n; j++) {
578         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
579         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
580         if (idxn[j] >= bscstart && idxn[j] < bscend) {
581           col  = idxn[j] - bscstart;
582           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
583         } else {
584           if (!baij->colmap) {
585             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
586           }
587 #if defined(PETSC_USE_CTABLE)
588           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
589           data--;
590 #else
591           data = baij->colmap[idxn[j]/bs]-1;
592 #endif
593           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
594           else {
595             col  = data + idxn[j]%bs;
596             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
597           }
598         }
599       }
600     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatNorm_MPIBAIJ"
607 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
608 {
609   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
610   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
611   PetscErrorCode ierr;
612   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
613   PetscReal      sum = 0.0;
614   MatScalar      *v;
615 
616   PetscFunctionBegin;
617   if (baij->size == 1) {
618     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
619   } else {
620     if (type == NORM_FROBENIUS) {
621       v  = amat->a;
622       nz = amat->nz*bs2;
623       for (i=0; i<nz; i++) {
624         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
625       }
626       v  = bmat->a;
627       nz = bmat->nz*bs2;
628       for (i=0; i<nz; i++) {
629         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
630       }
631       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
632       *nrm = PetscSqrtReal(*nrm);
633     } else if (type == NORM_1) { /* max column sum */
634       PetscReal *tmp,*tmp2;
635       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
636       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
637       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
638       v    = amat->a; jj = amat->j;
639       for (i=0; i<amat->nz; i++) {
640         for (j=0; j<bs; j++) {
641           col = bs*(cstart + *jj) + j; /* column index */
642           for (row=0; row<bs; row++) {
643             tmp[col] += PetscAbsScalar(*v);  v++;
644           }
645         }
646         jj++;
647       }
648       v = bmat->a; jj = bmat->j;
649       for (i=0; i<bmat->nz; i++) {
650         for (j=0; j<bs; j++) {
651           col = bs*garray[*jj] + j;
652           for (row=0; row<bs; row++) {
653             tmp[col] += PetscAbsScalar(*v); v++;
654           }
655         }
656         jj++;
657       }
658       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
659       *nrm = 0.0;
660       for (j=0; j<mat->cmap->N; j++) {
661         if (tmp2[j] > *nrm) *nrm = tmp2[j];
662       }
663       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
664     } else if (type == NORM_INFINITY) { /* max row sum */
665       PetscReal *sums;
666       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
667       sum  = 0.0;
668       for (j=0; j<amat->mbs; j++) {
669         for (row=0; row<bs; row++) sums[row] = 0.0;
670         v  = amat->a + bs2*amat->i[j];
671         nz = amat->i[j+1]-amat->i[j];
672         for (i=0; i<nz; i++) {
673           for (col=0; col<bs; col++) {
674             for (row=0; row<bs; row++) {
675               sums[row] += PetscAbsScalar(*v); v++;
676             }
677           }
678         }
679         v  = bmat->a + bs2*bmat->i[j];
680         nz = bmat->i[j+1]-bmat->i[j];
681         for (i=0; i<nz; i++) {
682           for (col=0; col<bs; col++) {
683             for (row=0; row<bs; row++) {
684               sums[row] += PetscAbsScalar(*v); v++;
685             }
686           }
687         }
688         for (row=0; row<bs; row++) {
689           if (sums[row] > sum) sum = sums[row];
690         }
691       }
692       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
693       ierr = PetscFree(sums);CHKERRQ(ierr);
694     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
695   }
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   Creates the hash table, and sets the table
701   This table is created only once.
702   If new entried need to be added to the matrix
703   then the hash table has to be destroyed and
704   recreated.
705 */
706 #undef __FUNCT__
707 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
708 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
709 {
710   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
711   Mat            A     = baij->A,B=baij->B;
712   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
713   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
714   PetscErrorCode ierr;
715   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
716   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
717   PetscInt       *HT,key;
718   MatScalar      **HD;
719   PetscReal      tmp;
720 #if defined(PETSC_USE_INFO)
721   PetscInt ct=0,max=0;
722 #endif
723 
724   PetscFunctionBegin;
725   if (baij->ht) PetscFunctionReturn(0);
726 
727   baij->ht_size = (PetscInt)(factor*nz);
728   ht_size       = baij->ht_size;
729 
730   /* Allocate Memory for Hash Table */
731   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
732   HD   = baij->hd;
733   HT   = baij->ht;
734 
735   /* Loop Over A */
736   for (i=0; i<a->mbs; i++) {
737     for (j=ai[i]; j<ai[i+1]; j++) {
738       row = i+rstart;
739       col = aj[j]+cstart;
740 
741       key = row*Nbs + col + 1;
742       h1  = HASH(ht_size,key,tmp);
743       for (k=0; k<ht_size; k++) {
744         if (!HT[(h1+k)%ht_size]) {
745           HT[(h1+k)%ht_size] = key;
746           HD[(h1+k)%ht_size] = a->a + j*bs2;
747           break;
748 #if defined(PETSC_USE_INFO)
749         } else {
750           ct++;
751 #endif
752         }
753       }
754 #if defined(PETSC_USE_INFO)
755       if (k> max) max = k;
756 #endif
757     }
758   }
759   /* Loop Over B */
760   for (i=0; i<b->mbs; i++) {
761     for (j=bi[i]; j<bi[i+1]; j++) {
762       row = i+rstart;
763       col = garray[bj[j]];
764       key = row*Nbs + col + 1;
765       h1  = HASH(ht_size,key,tmp);
766       for (k=0; k<ht_size; k++) {
767         if (!HT[(h1+k)%ht_size]) {
768           HT[(h1+k)%ht_size] = key;
769           HD[(h1+k)%ht_size] = b->a + j*bs2;
770           break;
771 #if defined(PETSC_USE_INFO)
772         } else {
773           ct++;
774 #endif
775         }
776       }
777 #if defined(PETSC_USE_INFO)
778       if (k> max) max = k;
779 #endif
780     }
781   }
782 
783   /* Print Summary */
784 #if defined(PETSC_USE_INFO)
785   for (i=0,j=0; i<ht_size; i++) {
786     if (HT[i]) j++;
787   }
788   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
789 #endif
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
795 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
796 {
797   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
798   PetscErrorCode ierr;
799   PetscInt       nstash,reallocs;
800   InsertMode     addv;
801 
802   PetscFunctionBegin;
803   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
804 
805   /* make sure all processors are either in INSERTMODE or ADDMODE */
806   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
807   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
808   mat->insertmode = addv; /* in case this processor had no cache */
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
812   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
813   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
821 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
822 {
823   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
824   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
825   PetscErrorCode ierr;
826   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
827   PetscInt       *row,*col;
828   PetscBool      r1,r2,r3,other_disassembled;
829   MatScalar      *val;
830   InsertMode     addv = mat->insertmode;
831   PetscMPIInt    n;
832 
833   PetscFunctionBegin;
834   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
835   if (!baij->donotstash && !mat->nooffprocentries) {
836     while (1) {
837       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
838       if (!flg) break;
839 
840       for (i=0; i<n;) {
841         /* Now identify the consecutive vals belonging to the same row */
842         for (j=i,rstart=row[j]; j<n; j++) {
843           if (row[j] != rstart) break;
844         }
845         if (j < n) ncols = j-i;
846         else       ncols = n-i;
847         /* Now assemble all these values with a single function call */
848         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
849         i    = j;
850       }
851     }
852     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
853     /* Now process the block-stash. Since the values are stashed column-oriented,
854        set the roworiented flag to column oriented, and after MatSetValues()
855        restore the original flags */
856     r1 = baij->roworiented;
857     r2 = a->roworiented;
858     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
859 
860     baij->roworiented = PETSC_FALSE;
861     a->roworiented    = PETSC_FALSE;
862 
863     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
864     while (1) {
865       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
866       if (!flg) break;
867 
868       for (i=0; i<n;) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for (j=i,rstart=row[j]; j<n; j++) {
871           if (row[j] != rstart) break;
872         }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
876         i    = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
880 
881     baij->roworiented = r1;
882     a->roworiented    = r2;
883 
884     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
885   }
886 
887   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
888   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
889 
890   /* determine if any processor has disassembled, if so we must
891      also disassemble ourselfs, in order that we may reassemble. */
892   /*
893      if nonzero structure of submatrix B cannot change then we know that
894      no processor disassembled thus we can skip this stuff
895   */
896   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
897     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
898     if (mat->was_assembled && !other_disassembled) {
899       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
900     }
901   }
902 
903   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
904     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
905   }
906   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
907   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
908 
909 #if defined(PETSC_USE_INFO)
910   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
911     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
912 
913     baij->ht_total_ct  = 0;
914     baij->ht_insert_ct = 0;
915   }
916 #endif
917   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
918     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
919 
920     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
921     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
922   }
923 
924   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
925 
926   baij->rowvalues = 0;
927 
928   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
929   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
930     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
931     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
937 #include <petscdraw.h>
938 #undef __FUNCT__
939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
941 {
942   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
943   PetscErrorCode    ierr;
944   PetscMPIInt       rank = baij->rank;
945   PetscInt          bs   = mat->rmap->bs;
946   PetscBool         iascii,isdraw;
947   PetscViewer       sviewer;
948   PetscViewerFormat format;
949 
950   PetscFunctionBegin;
951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
953   if (iascii) {
954     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
955     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
956       MatInfo info;
957       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
958       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
961                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
964       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
966       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
967       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
968       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
969       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     } else if (format == PETSC_VIEWER_ASCII_INFO) {
972       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
973       PetscFunctionReturn(0);
974     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
975       PetscFunctionReturn(0);
976     }
977   }
978 
979   if (isdraw) {
980     PetscDraw draw;
981     PetscBool isnull;
982     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
983     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
984   }
985 
986   {
987     /* assemble the entire matrix onto first processor. */
988     Mat         A;
989     Mat_SeqBAIJ *Aloc;
990     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
991     MatScalar   *a;
992 
993     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
994     /* Perhaps this should be the type of mat? */
995     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
996     if (!rank) {
997       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
998     } else {
999       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1000     }
1001     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1002     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1003     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1004     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1005 
1006     /* copy over the A part */
1007     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1008     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1009     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1010 
1011     for (i=0; i<mbs; i++) {
1012       rvals[0] = bs*(baij->rstartbs + i);
1013       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1014       for (j=ai[i]; j<ai[i+1]; j++) {
1015         col = (baij->cstartbs+aj[j])*bs;
1016         for (k=0; k<bs; k++) {
1017           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1018           col++; a += bs;
1019         }
1020       }
1021     }
1022     /* copy over the B part */
1023     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1024     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = baij->garray[aj[j]]*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     ierr = PetscFree(rvals);CHKERRQ(ierr);
1037     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     /*
1040        Everyone has to call to draw the matrix since the graphics waits are
1041        synchronized across all processors that share the PetscDraw object
1042     */
1043     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1044     if (!rank) {
1045       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1046     }
1047     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1048     ierr = MatDestroy(&A);CHKERRQ(ierr);
1049   }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1055 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1056 {
1057   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1058   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1059   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1060   PetscErrorCode ierr;
1061   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1062   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1063   int            fd;
1064   PetscScalar    *column_values;
1065   FILE           *file;
1066   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1067   PetscInt       message_count,flowcontrolcount;
1068 
1069   PetscFunctionBegin;
1070   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1071   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1072   nz   = bs2*(A->nz + B->nz);
1073   rlen = mat->rmap->n;
1074   if (!rank) {
1075     header[0] = MAT_FILE_CLASSID;
1076     header[1] = mat->rmap->N;
1077     header[2] = mat->cmap->N;
1078 
1079     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1080     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1081     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1082     /* get largest number of rows any processor has */
1083     range = mat->rmap->range;
1084     for (i=1; i<size; i++) {
1085       rlen = PetscMax(rlen,range[i+1] - range[i]);
1086     }
1087   } else {
1088     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1089   }
1090 
1091   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1092   /* compute lengths of each row  */
1093   for (i=0; i<a->mbs; i++) {
1094     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1095   }
1096   /* store the row lengths to the file */
1097   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1098   if (!rank) {
1099     MPI_Status status;
1100     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1101     rlen = (range[1] - range[0])/bs;
1102     for (i=0; i<rlen; i++) {
1103       for (j=0; j<bs; j++) {
1104         row_lens[i*bs+j] = bs*crow_lens[i];
1105       }
1106     }
1107     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1108     for (i=1; i<size; i++) {
1109       rlen = (range[i+1] - range[i])/bs;
1110       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1111       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1112       for (k=0; k<rlen; k++) {
1113         for (j=0; j<bs; j++) {
1114           row_lens[k*bs+j] = bs*crow_lens[k];
1115         }
1116       }
1117       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1118     }
1119     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1120     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1121   } else {
1122     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1123     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1124     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1125   }
1126   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1127 
1128   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1129      information needed to make it for each row from a block row. This does require more communication but still not more than
1130      the communication needed for the nonzero values  */
1131   nzmax = nz; /*  space a largest processor needs */
1132   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1133   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1134   cnt   = 0;
1135   for (i=0; i<a->mbs; i++) {
1136     pcnt = cnt;
1137     for (j=B->i[i]; j<B->i[i+1]; j++) {
1138       if ((col = garray[B->j[j]]) > cstart) break;
1139       for (l=0; l<bs; l++) {
1140         column_indices[cnt++] = bs*col+l;
1141       }
1142     }
1143     for (k=A->i[i]; k<A->i[i+1]; k++) {
1144       for (l=0; l<bs; l++) {
1145         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1146       }
1147     }
1148     for (; j<B->i[i+1]; j++) {
1149       for (l=0; l<bs; l++) {
1150         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1151       }
1152     }
1153     len = cnt - pcnt;
1154     for (k=1; k<bs; k++) {
1155       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1156       cnt += len;
1157     }
1158   }
1159   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1160 
1161   /* store the columns to the file */
1162   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1163   if (!rank) {
1164     MPI_Status status;
1165     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166     for (i=1; i<size; i++) {
1167       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1168       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1169       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1170       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     }
1172     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1173   } else {
1174     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1175     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1176     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1177     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1178   }
1179   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1180 
1181   /* load up the numerical values */
1182   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1183   cnt  = 0;
1184   for (i=0; i<a->mbs; i++) {
1185     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1186     for (j=B->i[i]; j<B->i[i+1]; j++) {
1187       if (garray[B->j[j]] > cstart) break;
1188       for (l=0; l<bs; l++) {
1189         for (ll=0; ll<bs; ll++) {
1190           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1191         }
1192       }
1193       cnt += bs;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) {
1196       for (l=0; l<bs; l++) {
1197         for (ll=0; ll<bs; ll++) {
1198           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1199         }
1200       }
1201       cnt += bs;
1202     }
1203     for (; j<B->i[i+1]; j++) {
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     cnt += (bs-1)*rlen;
1212   }
1213   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1214 
1215   /* store the column values to the file */
1216   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1217   if (!rank) {
1218     MPI_Status status;
1219     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1220     for (i=1; i<size; i++) {
1221       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1222       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1223       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1224       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1225     }
1226     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1227   } else {
1228     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1229     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1232   }
1233   ierr = PetscFree(column_values);CHKERRQ(ierr);
1234 
1235   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1236   if (file) {
1237     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatView_MPIBAIJ"
1244 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1245 {
1246   PetscErrorCode ierr;
1247   PetscBool      iascii,isdraw,issocket,isbinary;
1248 
1249   PetscFunctionBegin;
1250   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1254   if (iascii || isdraw || issocket) {
1255     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1256   } else if (isbinary) {
1257     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1264 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1265 {
1266   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270 #if defined(PETSC_USE_LOG)
1271   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1272 #endif
1273   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1274   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1275   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1277 #if defined(PETSC_USE_CTABLE)
1278   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1279 #else
1280   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1281 #endif
1282   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1283   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1284   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1285   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1286   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1287   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1288   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1289   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1290 
1291   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatMult_MPIBAIJ"
1306 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1307 {
1308   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1309   PetscErrorCode ierr;
1310   PetscInt       nt;
1311 
1312   PetscFunctionBegin;
1313   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1314   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1315   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1316   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1317   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1318   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1319   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1320   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1326 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1333   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1334   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1341 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1342 {
1343   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1344   PetscErrorCode ierr;
1345   PetscBool      merged;
1346 
1347   PetscFunctionBegin;
1348   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1349   /* do nondiagonal part */
1350   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1351   if (!merged) {
1352     /* send it on its way */
1353     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1354     /* do local part */
1355     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1356     /* receive remote parts: note this assumes the values are not actually */
1357     /* inserted in yy until the next line */
1358     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1359   } else {
1360     /* do local part */
1361     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1362     /* send it on its way */
1363     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1364     /* values actually were received in the Begin() but we need to call this nop */
1365     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1372 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1373 {
1374   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   /* do nondiagonal part */
1379   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1380   /* send it on its way */
1381   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382   /* do local part */
1383   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1384   /* receive remote parts: note this assumes the values are not actually */
1385   /* inserted in yy until the next line, which is true for my implementation*/
1386   /* but is not perhaps always true. */
1387   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 /*
1392   This only works correctly for square matrices where the subblock A->A is the
1393    diagonal block
1394 */
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1397 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1404   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatScale_MPIBAIJ"
1410 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1411 {
1412   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1417   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1423 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1424 {
1425   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1426   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1427   PetscErrorCode ierr;
1428   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1429   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1430   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1431 
1432   PetscFunctionBegin;
1433   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1434   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1435   mat->getrowactive = PETSC_TRUE;
1436 
1437   if (!mat->rowvalues && (idx || v)) {
1438     /*
1439         allocate enough space to hold information from the longest row.
1440     */
1441     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1442     PetscInt    max = 1,mbs = mat->mbs,tmp;
1443     for (i=0; i<mbs; i++) {
1444       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1445       if (max < tmp) max = tmp;
1446     }
1447     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1448   }
1449   lrow = row - brstart;
1450 
1451   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1452   if (!v)   {pvA = 0; pvB = 0;}
1453   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1454   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1455   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1456   nztot = nzA + nzB;
1457 
1458   cmap = mat->garray;
1459   if (v  || idx) {
1460     if (nztot) {
1461       /* Sort by increasing column numbers, assuming A and B already sorted */
1462       PetscInt imark = -1;
1463       if (v) {
1464         *v = v_p = mat->rowvalues;
1465         for (i=0; i<nzB; i++) {
1466           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1467           else break;
1468         }
1469         imark = i;
1470         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1471         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1472       }
1473       if (idx) {
1474         *idx = idx_p = mat->rowindices;
1475         if (imark > -1) {
1476           for (i=0; i<imark; i++) {
1477             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1478           }
1479         } else {
1480           for (i=0; i<nzB; i++) {
1481             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1482             else break;
1483           }
1484           imark = i;
1485         }
1486         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1487         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1488       }
1489     } else {
1490       if (idx) *idx = 0;
1491       if (v)   *v   = 0;
1492     }
1493   }
1494   *nz  = nztot;
1495   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1496   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1503 {
1504   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1505 
1506   PetscFunctionBegin;
1507   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1508   baij->getrowactive = PETSC_FALSE;
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1515 {
1516   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1521   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1528 {
1529   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1530   Mat            A  = a->A,B = a->B;
1531   PetscErrorCode ierr;
1532   PetscReal      isend[5],irecv[5];
1533 
1534   PetscFunctionBegin;
1535   info->block_size = (PetscReal)matin->rmap->bs;
1536 
1537   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1538 
1539   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1540   isend[3] = info->memory;  isend[4] = info->mallocs;
1541 
1542   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1543 
1544   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1545   isend[3] += info->memory;  isend[4] += info->mallocs;
1546 
1547   if (flag == MAT_LOCAL) {
1548     info->nz_used      = isend[0];
1549     info->nz_allocated = isend[1];
1550     info->nz_unneeded  = isend[2];
1551     info->memory       = isend[3];
1552     info->mallocs      = isend[4];
1553   } else if (flag == MAT_GLOBAL_MAX) {
1554     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1555 
1556     info->nz_used      = irecv[0];
1557     info->nz_allocated = irecv[1];
1558     info->nz_unneeded  = irecv[2];
1559     info->memory       = irecv[3];
1560     info->mallocs      = irecv[4];
1561   } else if (flag == MAT_GLOBAL_SUM) {
1562     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1563 
1564     info->nz_used      = irecv[0];
1565     info->nz_allocated = irecv[1];
1566     info->nz_unneeded  = irecv[2];
1567     info->memory       = irecv[3];
1568     info->mallocs      = irecv[4];
1569   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1570   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1571   info->fill_ratio_needed = 0;
1572   info->factor_mallocs    = 0;
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1578 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1579 {
1580   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1581   PetscErrorCode ierr;
1582 
1583   PetscFunctionBegin;
1584   switch (op) {
1585   case MAT_NEW_NONZERO_LOCATIONS:
1586   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1587   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1588   case MAT_KEEP_NONZERO_PATTERN:
1589   case MAT_NEW_NONZERO_LOCATION_ERR:
1590     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1591     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1592     break;
1593   case MAT_ROW_ORIENTED:
1594     a->roworiented = flg;
1595 
1596     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1597     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1598     break;
1599   case MAT_NEW_DIAGONALS:
1600     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1601     break;
1602   case MAT_IGNORE_OFF_PROC_ENTRIES:
1603     a->donotstash = flg;
1604     break;
1605   case MAT_USE_HASH_TABLE:
1606     a->ht_flag = flg;
1607     break;
1608   case MAT_SYMMETRIC:
1609   case MAT_STRUCTURALLY_SYMMETRIC:
1610   case MAT_HERMITIAN:
1611   case MAT_SYMMETRY_ETERNAL:
1612     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1613     break;
1614   default:
1615     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1622 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1623 {
1624   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1625   Mat_SeqBAIJ    *Aloc;
1626   Mat            B;
1627   PetscErrorCode ierr;
1628   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1629   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1630   MatScalar      *a;
1631 
1632   PetscFunctionBegin;
1633   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1634   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1635     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1636     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1637     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1638     /* Do not know preallocation information, but must set block size */
1639     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1640   } else {
1641     B = *matout;
1642   }
1643 
1644   /* copy over the A part */
1645   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1646   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1647   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1648 
1649   for (i=0; i<mbs; i++) {
1650     rvals[0] = bs*(baij->rstartbs + i);
1651     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1652     for (j=ai[i]; j<ai[i+1]; j++) {
1653       col = (baij->cstartbs+aj[j])*bs;
1654       for (k=0; k<bs; k++) {
1655         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1656 
1657         col++; a += bs;
1658       }
1659     }
1660   }
1661   /* copy over the B part */
1662   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1663   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1664   for (i=0; i<mbs; i++) {
1665     rvals[0] = bs*(baij->rstartbs + i);
1666     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1667     for (j=ai[i]; j<ai[i+1]; j++) {
1668       col = baij->garray[aj[j]]*bs;
1669       for (k=0; k<bs; k++) {
1670         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1671         col++;
1672         a += bs;
1673       }
1674     }
1675   }
1676   ierr = PetscFree(rvals);CHKERRQ(ierr);
1677   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1678   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1679 
1680   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1681   else {
1682     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1683   }
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1689 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1690 {
1691   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1692   Mat            a     = baij->A,b = baij->B;
1693   PetscErrorCode ierr;
1694   PetscInt       s1,s2,s3;
1695 
1696   PetscFunctionBegin;
1697   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1698   if (rr) {
1699     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1700     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1701     /* Overlap communication with computation. */
1702     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1703   }
1704   if (ll) {
1705     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1706     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1707     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1708   }
1709   /* scale  the diagonal block */
1710   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1711 
1712   if (rr) {
1713     /* Do a scatter end and then right scale the off-diagonal block */
1714     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1722 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1723 {
1724   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1725   PetscInt      *owners = A->rmap->range;
1726   PetscInt       n      = A->rmap->n;
1727   PetscSF        sf;
1728   PetscInt      *lrows;
1729   PetscSFNode   *rrows;
1730   PetscInt       r, p = 0, len = 0;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   /* Create SF where leaves are input rows and roots are owned rows */
1735   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1736   for (r = 0; r < n; ++r) lrows[r] = -1;
1737   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
1738   for (r = 0; r < N; ++r) {
1739     const PetscInt idx   = rows[r];
1740     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1741     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1742       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1743     }
1744     if (A->nooffproczerorows) {
1745       if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank);
1746       lrows[len++] = idx - owners[p];
1747     } else {
1748       rrows[r].rank = p;
1749       rrows[r].index = rows[r] - owners[p];
1750     }
1751   }
1752   if (!A->nooffproczerorows) {
1753     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1754     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1755     /* Collect flags for rows to be zeroed */
1756     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1757     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1758     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1759     /* Compress and put in row numbers */
1760     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1761   }
1762   /* fix right hand side if needed */
1763   if (x && b) {
1764     const PetscScalar *xx;
1765     PetscScalar       *bb;
1766 
1767     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1768     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1769     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1770     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1771     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1772   }
1773 
1774   /* actually zap the local rows */
1775   /*
1776         Zero the required rows. If the "diagonal block" of the matrix
1777      is square and the user wishes to set the diagonal we use separate
1778      code so that MatSetValues() is not called for each diagonal allocating
1779      new memory, thus calling lots of mallocs and slowing things down.
1780 
1781   */
1782   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1783   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1784   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1785     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1786   } else if (diag != 0.0) {
1787     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1788     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1789        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1790     for (r = 0; r < len; ++r) {
1791       const PetscInt row = lrows[r] + A->rmap->rstart;
1792       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1793     }
1794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   } else {
1797     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1798   }
1799   ierr = PetscFree(lrows);CHKERRQ(ierr);
1800 
1801   /* only change matrix nonzero state if pattern was allowed to be changed */
1802   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1803     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1804     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1805   }
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1811 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1812 {
1813   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1814   PetscErrorCode    ierr;
1815   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
1816   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1817   PetscInt          *lrows,*owners = A->rmap->range;
1818   PetscSFNode       *rrows;
1819   PetscSF           sf;
1820   const PetscScalar *xx;
1821   PetscScalar       *bb,*mask;
1822   Vec               xmask,lmask;
1823   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1824   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1825   PetscScalar       *aa;
1826 #if defined(PETSC_DEBUG)
1827   PetscBool found = PETSC_FALSE;
1828 #endif
1829 
1830   PetscFunctionBegin;
1831   /* Create SF where leaves are input rows and roots are owned rows */
1832   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1833   for (r = 0; r < n; ++r) lrows[r] = -1;
1834   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1835   for (r = 0; r < N; ++r) {
1836     const PetscInt idx   = rows[r];
1837     PetscBool      found = PETSC_FALSE;
1838     /* Trick for efficient searching for sorted rows */
1839     if (lastidx > idx) p = 0;
1840     lastidx = idx;
1841     for (; p < size; ++p) {
1842       if (idx >= owners[p] && idx < owners[p+1]) {
1843         rrows[r].rank  = p;
1844         rrows[r].index = rows[r] - owners[p];
1845         found = PETSC_TRUE;
1846         break;
1847       }
1848     }
1849     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1850   }
1851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1853   /* Collect flags for rows to be zeroed */
1854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1857   /* Compress and put in row numbers */
1858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1859   /* zero diagonal part of matrix */
1860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1861   /* handle off diagonal part of matrix */
1862   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1870   if (x) {
1871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1875   }
1876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1877   /* remove zeroed rows of off diagonal matrix */
1878   for (i = 0; i < len; ++i) {
1879     row   = lrows[i];
1880     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1881     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1882     for (k = 0; k < count; ++k) {
1883       aa[0] = 0.0;
1884       aa   += bs;
1885     }
1886   }
1887   /* loop over all elements of off process part of matrix zeroing removed columns*/
1888   for (i = 0; i < l->B->rmap->N; ++i) {
1889     row = i/bs;
1890     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1891       for (k = 0; k < bs; ++k) {
1892         col = bs*baij->j[j] + k;
1893         if (PetscAbsScalar(mask[col])) {
1894           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1895           if (b) bb[i] -= aa[0]*xx[col];
1896           aa[0] = 0.0;
1897         }
1898       }
1899     }
1900   }
1901   if (x) {
1902     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1903     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1904   }
1905   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1906   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1907   ierr = PetscFree(lrows);CHKERRQ(ierr);
1908 
1909   /* only change matrix nonzero state if pattern was allowed to be changed */
1910   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1911     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1912     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1919 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1920 {
1921   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1922   PetscErrorCode ierr;
1923 
1924   PetscFunctionBegin;
1925   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatEqual_MPIBAIJ"
1933 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1934 {
1935   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1936   Mat            a,b,c,d;
1937   PetscBool      flg;
1938   PetscErrorCode ierr;
1939 
1940   PetscFunctionBegin;
1941   a = matA->A; b = matA->B;
1942   c = matB->A; d = matB->B;
1943 
1944   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1945   if (flg) {
1946     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1947   }
1948   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatCopy_MPIBAIJ"
1954 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1955 {
1956   PetscErrorCode ierr;
1957   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1958   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1959 
1960   PetscFunctionBegin;
1961   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1962   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1963     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1964   } else {
1965     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1966     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1967   }
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1973 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1974 {
1975   PetscErrorCode ierr;
1976 
1977   PetscFunctionBegin;
1978   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 #undef __FUNCT__
1983 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
1984 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1985 {
1986   PetscErrorCode ierr;
1987   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1988   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1989   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1990 
1991   PetscFunctionBegin;
1992   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 #undef __FUNCT__
1997 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1998 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1999 {
2000   PetscErrorCode ierr;
2001   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
2002   PetscBLASInt   bnz,one=1;
2003   Mat_SeqBAIJ    *x,*y;
2004 
2005   PetscFunctionBegin;
2006   if (str == SAME_NONZERO_PATTERN) {
2007     PetscScalar alpha = a;
2008     x    = (Mat_SeqBAIJ*)xx->A->data;
2009     y    = (Mat_SeqBAIJ*)yy->A->data;
2010     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2011     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2012     x    = (Mat_SeqBAIJ*)xx->B->data;
2013     y    = (Mat_SeqBAIJ*)yy->B->data;
2014     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2015     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2016     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2017   } else {
2018     Mat      B;
2019     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2020     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2021     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2022     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2023     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2024     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2025     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2026     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2027     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2028     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2029     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2030     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2031     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2032     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2033     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2034     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2035   }
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2041 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2042 {
2043   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2044   PetscErrorCode ierr;
2045 
2046   PetscFunctionBegin;
2047   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2048   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2054 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2055 {
2056   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2057   PetscErrorCode ierr;
2058 
2059   PetscFunctionBegin;
2060   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2061   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2067 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2068 {
2069   PetscErrorCode ierr;
2070   IS             iscol_local;
2071   PetscInt       csize;
2072 
2073   PetscFunctionBegin;
2074   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2075   if (call == MAT_REUSE_MATRIX) {
2076     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2077     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2078   } else {
2079     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2080   }
2081   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2082   if (call == MAT_INITIAL_MATRIX) {
2083     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2084     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2085   }
2086   PetscFunctionReturn(0);
2087 }
2088 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2091 /*
2092   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2093   in local and then by concatenating the local matrices the end result.
2094   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2095 */
2096 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2097 {
2098   PetscErrorCode ierr;
2099   PetscMPIInt    rank,size;
2100   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2101   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2102   Mat            M,Mreuse;
2103   MatScalar      *vwork,*aa;
2104   MPI_Comm       comm;
2105   IS             isrow_new, iscol_new;
2106   PetscBool      idflag,allrows, allcols;
2107   Mat_SeqBAIJ    *aij;
2108 
2109   PetscFunctionBegin;
2110   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2111   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2112   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2113   /* The compression and expansion should be avoided. Doesn't point
2114      out errors, might change the indices, hence buggey */
2115   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2116   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2117 
2118   /* Check for special case: each processor gets entire matrix columns */
2119   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2120   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2121   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2122   else allcols = PETSC_FALSE;
2123 
2124   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2125   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2126   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2127   else allrows = PETSC_FALSE;
2128 
2129   if (call ==  MAT_REUSE_MATRIX) {
2130     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2131     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2132     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2133   } else {
2134     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2135   }
2136   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2137   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2138   /*
2139       m - number of local rows
2140       n - number of columns (same on all processors)
2141       rstart - first row in new global matrix generated
2142   */
2143   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2144   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2145   m    = m/bs;
2146   n    = n/bs;
2147 
2148   if (call == MAT_INITIAL_MATRIX) {
2149     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2150     ii  = aij->i;
2151     jj  = aij->j;
2152 
2153     /*
2154         Determine the number of non-zeros in the diagonal and off-diagonal
2155         portions of the matrix in order to do correct preallocation
2156     */
2157 
2158     /* first get start and end of "diagonal" columns */
2159     if (csize == PETSC_DECIDE) {
2160       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2161       if (mglobal == n*bs) { /* square matrix */
2162         nlocal = m;
2163       } else {
2164         nlocal = n/size + ((n % size) > rank);
2165       }
2166     } else {
2167       nlocal = csize/bs;
2168     }
2169     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2170     rstart = rend - nlocal;
2171     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2172 
2173     /* next, compute all the lengths */
2174     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2175     for (i=0; i<m; i++) {
2176       jend = ii[i+1] - ii[i];
2177       olen = 0;
2178       dlen = 0;
2179       for (j=0; j<jend; j++) {
2180         if (*jj < rstart || *jj >= rend) olen++;
2181         else dlen++;
2182         jj++;
2183       }
2184       olens[i] = olen;
2185       dlens[i] = dlen;
2186     }
2187     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2188     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2189     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2190     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2191     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2192   } else {
2193     PetscInt ml,nl;
2194 
2195     M    = *newmat;
2196     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2197     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2198     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2199     /*
2200          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2201        rather than the slower MatSetValues().
2202     */
2203     M->was_assembled = PETSC_TRUE;
2204     M->assembled     = PETSC_FALSE;
2205   }
2206   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2207   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2208   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2209   ii   = aij->i;
2210   jj   = aij->j;
2211   aa   = aij->a;
2212   for (i=0; i<m; i++) {
2213     row   = rstart/bs + i;
2214     nz    = ii[i+1] - ii[i];
2215     cwork = jj;     jj += nz;
2216     vwork = aa;     aa += nz*bs*bs;
2217     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2218   }
2219 
2220   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2221   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2222   *newmat = M;
2223 
2224   /* save submatrix used in processor for next request */
2225   if (call ==  MAT_INITIAL_MATRIX) {
2226     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2227     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatPermute_MPIBAIJ"
2234 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2235 {
2236   MPI_Comm       comm,pcomm;
2237   PetscInt       clocal_size,nrows;
2238   const PetscInt *rows;
2239   PetscMPIInt    size;
2240   IS             crowp,lcolp;
2241   PetscErrorCode ierr;
2242 
2243   PetscFunctionBegin;
2244   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2245   /* make a collective version of 'rowp' */
2246   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2247   if (pcomm==comm) {
2248     crowp = rowp;
2249   } else {
2250     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2251     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2252     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2253     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2254   }
2255   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2256   /* make a local version of 'colp' */
2257   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2258   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2259   if (size==1) {
2260     lcolp = colp;
2261   } else {
2262     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2263   }
2264   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2265   /* now we just get the submatrix */
2266   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2267   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2268   /* clean up */
2269   if (pcomm!=comm) {
2270     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2271   }
2272   if (size>1) {
2273     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2274   }
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2280 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2281 {
2282   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2283   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2284 
2285   PetscFunctionBegin;
2286   if (nghosts) *nghosts = B->nbs;
2287   if (ghosts) *ghosts = baij->garray;
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2293 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2294 {
2295   Mat            B;
2296   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2297   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2298   Mat_SeqAIJ     *b;
2299   PetscErrorCode ierr;
2300   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2301   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2302   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2303 
2304   PetscFunctionBegin;
2305   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2306   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2307 
2308   /* ----------------------------------------------------------------
2309      Tell every processor the number of nonzeros per row
2310   */
2311   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2312   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2313     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2314   }
2315   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2316   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2317   displs    = recvcounts + size;
2318   for (i=0; i<size; i++) {
2319     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2320     displs[i]     = A->rmap->range[i]/bs;
2321   }
2322 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2323   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2324 #else
2325   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2326 #endif
2327   /* ---------------------------------------------------------------
2328      Create the sequential matrix of the same type as the local block diagonal
2329   */
2330   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2331   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2332   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2333   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2334   b    = (Mat_SeqAIJ*)B->data;
2335 
2336   /*--------------------------------------------------------------------
2337     Copy my part of matrix column indices over
2338   */
2339   sendcount  = ad->nz + bd->nz;
2340   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2341   a_jsendbuf = ad->j;
2342   b_jsendbuf = bd->j;
2343   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2344   cnt        = 0;
2345   for (i=0; i<n; i++) {
2346 
2347     /* put in lower diagonal portion */
2348     m = bd->i[i+1] - bd->i[i];
2349     while (m > 0) {
2350       /* is it above diagonal (in bd (compressed) numbering) */
2351       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2352       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2353       m--;
2354     }
2355 
2356     /* put in diagonal portion */
2357     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2358       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2359     }
2360 
2361     /* put in upper diagonal portion */
2362     while (m-- > 0) {
2363       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2364     }
2365   }
2366   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2367 
2368   /*--------------------------------------------------------------------
2369     Gather all column indices to all processors
2370   */
2371   for (i=0; i<size; i++) {
2372     recvcounts[i] = 0;
2373     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2374       recvcounts[i] += lens[j];
2375     }
2376   }
2377   displs[0] = 0;
2378   for (i=1; i<size; i++) {
2379     displs[i] = displs[i-1] + recvcounts[i-1];
2380   }
2381 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2382   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2383 #else
2384   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2385 #endif
2386   /*--------------------------------------------------------------------
2387     Assemble the matrix into useable form (note numerical values not yet set)
2388   */
2389   /* set the b->ilen (length of each row) values */
2390   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2391   /* set the b->i indices */
2392   b->i[0] = 0;
2393   for (i=1; i<=A->rmap->N/bs; i++) {
2394     b->i[i] = b->i[i-1] + lens[i-1];
2395   }
2396   ierr = PetscFree(lens);CHKERRQ(ierr);
2397   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2398   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2399   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2400 
2401   if (A->symmetric) {
2402     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2403   } else if (A->hermitian) {
2404     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2405   } else if (A->structurally_symmetric) {
2406     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2407   }
2408   *newmat = B;
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 #undef __FUNCT__
2413 #define __FUNCT__ "MatSOR_MPIBAIJ"
2414 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2415 {
2416   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2417   PetscErrorCode ierr;
2418   Vec            bb1 = 0;
2419 
2420   PetscFunctionBegin;
2421   if (flag == SOR_APPLY_UPPER) {
2422     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2423     PetscFunctionReturn(0);
2424   }
2425 
2426   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2427     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2428   }
2429 
2430   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2431     if (flag & SOR_ZERO_INITIAL_GUESS) {
2432       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2433       its--;
2434     }
2435 
2436     while (its--) {
2437       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2438       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2439 
2440       /* update rhs: bb1 = bb - B*x */
2441       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2442       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2443 
2444       /* local sweep */
2445       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2446     }
2447   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2448     if (flag & SOR_ZERO_INITIAL_GUESS) {
2449       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2450       its--;
2451     }
2452     while (its--) {
2453       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2454       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2455 
2456       /* update rhs: bb1 = bb - B*x */
2457       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2458       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2459 
2460       /* local sweep */
2461       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2462     }
2463   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2464     if (flag & SOR_ZERO_INITIAL_GUESS) {
2465       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2466       its--;
2467     }
2468     while (its--) {
2469       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2470       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2471 
2472       /* update rhs: bb1 = bb - B*x */
2473       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2474       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2475 
2476       /* local sweep */
2477       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2478     }
2479   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2480 
2481   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2487 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2488 {
2489   PetscErrorCode ierr;
2490   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2491   PetscInt       N,i,*garray = aij->garray;
2492   PetscInt       ib,jb,bs = A->rmap->bs;
2493   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2494   MatScalar      *a_val = a_aij->a;
2495   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2496   MatScalar      *b_val = b_aij->a;
2497   PetscReal      *work;
2498 
2499   PetscFunctionBegin;
2500   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2501   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2502   if (type == NORM_2) {
2503     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2504       for (jb=0; jb<bs; jb++) {
2505         for (ib=0; ib<bs; ib++) {
2506           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2507           a_val++;
2508         }
2509       }
2510     }
2511     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2512       for (jb=0; jb<bs; jb++) {
2513         for (ib=0; ib<bs; ib++) {
2514           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2515           b_val++;
2516         }
2517       }
2518     }
2519   } else if (type == NORM_1) {
2520     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2521       for (jb=0; jb<bs; jb++) {
2522         for (ib=0; ib<bs; ib++) {
2523           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2524           a_val++;
2525         }
2526       }
2527     }
2528     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2529       for (jb=0; jb<bs; jb++) {
2530        for (ib=0; ib<bs; ib++) {
2531           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2532           b_val++;
2533         }
2534       }
2535     }
2536   } else if (type == NORM_INFINITY) {
2537     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2538       for (jb=0; jb<bs; jb++) {
2539         for (ib=0; ib<bs; ib++) {
2540           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2541           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2542           a_val++;
2543         }
2544       }
2545     }
2546     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2547       for (jb=0; jb<bs; jb++) {
2548         for (ib=0; ib<bs; ib++) {
2549           int col = garray[b_aij->j[i]] * bs + jb;
2550           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2551           b_val++;
2552         }
2553       }
2554     }
2555   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2556   if (type == NORM_INFINITY) {
2557     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2558   } else {
2559     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2560   }
2561   ierr = PetscFree(work);CHKERRQ(ierr);
2562   if (type == NORM_2) {
2563     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2564   }
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 #undef __FUNCT__
2569 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2570 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2571 {
2572   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2573   PetscErrorCode ierr;
2574 
2575   PetscFunctionBegin;
2576   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2577   PetscFunctionReturn(0);
2578 }
2579 
2580 
2581 /* -------------------------------------------------------------------*/
2582 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2583                                        MatGetRow_MPIBAIJ,
2584                                        MatRestoreRow_MPIBAIJ,
2585                                        MatMult_MPIBAIJ,
2586                                 /* 4*/ MatMultAdd_MPIBAIJ,
2587                                        MatMultTranspose_MPIBAIJ,
2588                                        MatMultTransposeAdd_MPIBAIJ,
2589                                        0,
2590                                        0,
2591                                        0,
2592                                 /*10*/ 0,
2593                                        0,
2594                                        0,
2595                                        MatSOR_MPIBAIJ,
2596                                        MatTranspose_MPIBAIJ,
2597                                 /*15*/ MatGetInfo_MPIBAIJ,
2598                                        MatEqual_MPIBAIJ,
2599                                        MatGetDiagonal_MPIBAIJ,
2600                                        MatDiagonalScale_MPIBAIJ,
2601                                        MatNorm_MPIBAIJ,
2602                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2603                                        MatAssemblyEnd_MPIBAIJ,
2604                                        MatSetOption_MPIBAIJ,
2605                                        MatZeroEntries_MPIBAIJ,
2606                                 /*24*/ MatZeroRows_MPIBAIJ,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                 /*29*/ MatSetUp_MPIBAIJ,
2612                                        0,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                 /*34*/ MatDuplicate_MPIBAIJ,
2617                                        0,
2618                                        0,
2619                                        0,
2620                                        0,
2621                                 /*39*/ MatAXPY_MPIBAIJ,
2622                                        MatGetSubMatrices_MPIBAIJ,
2623                                        MatIncreaseOverlap_MPIBAIJ,
2624                                        MatGetValues_MPIBAIJ,
2625                                        MatCopy_MPIBAIJ,
2626                                 /*44*/ 0,
2627                                        MatScale_MPIBAIJ,
2628                                        0,
2629                                        0,
2630                                        MatZeroRowsColumns_MPIBAIJ,
2631                                 /*49*/ 0,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2637                                        0,
2638                                        MatSetUnfactored_MPIBAIJ,
2639                                        MatPermute_MPIBAIJ,
2640                                        MatSetValuesBlocked_MPIBAIJ,
2641                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2642                                        MatDestroy_MPIBAIJ,
2643                                        MatView_MPIBAIJ,
2644                                        0,
2645                                        0,
2646                                 /*64*/ 0,
2647                                        0,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                 /*74*/ 0,
2657                                        MatFDColoringApply_BAIJ,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                 /*79*/ 0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        MatLoad_MPIBAIJ,
2666                                 /*84*/ 0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                 /*89*/ 0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                 /*94*/ 0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                 /*99*/ 0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                 /*104*/0,
2687                                        MatRealPart_MPIBAIJ,
2688                                        MatImaginaryPart_MPIBAIJ,
2689                                        0,
2690                                        0,
2691                                 /*109*/0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2697                                        0,
2698                                        MatGetGhosts_MPIBAIJ,
2699                                        0,
2700                                        0,
2701                                 /*119*/0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        MatGetMultiProcBlock_MPIBAIJ,
2706                                 /*124*/0,
2707                                        MatGetColumnNorms_MPIBAIJ,
2708                                        MatInvertBlockDiagonal_MPIBAIJ,
2709                                        0,
2710                                        0,
2711                                /*129*/ 0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                /*134*/ 0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                /*139*/ 0,
2722                                        0,
2723                                        0,
2724                                        MatFDColoringSetUp_MPIXAIJ
2725 };
2726 
2727 #undef __FUNCT__
2728 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2729 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2730 {
2731   PetscFunctionBegin;
2732   *a = ((Mat_MPIBAIJ*)A->data)->A;
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2737 
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2740 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2741 {
2742   PetscInt       m,rstart,cstart,cend;
2743   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2744   const PetscInt *JJ    =0;
2745   PetscScalar    *values=0;
2746   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2751   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2752   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2753   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2754   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2755   m      = B->rmap->n/bs;
2756   rstart = B->rmap->rstart/bs;
2757   cstart = B->cmap->rstart/bs;
2758   cend   = B->cmap->rend/bs;
2759 
2760   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2761   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2762   for (i=0; i<m; i++) {
2763     nz = ii[i+1] - ii[i];
2764     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2765     nz_max = PetscMax(nz_max,nz);
2766     JJ     = jj + ii[i];
2767     for (j=0; j<nz; j++) {
2768       if (*JJ >= cstart) break;
2769       JJ++;
2770     }
2771     d = 0;
2772     for (; j<nz; j++) {
2773       if (*JJ++ >= cend) break;
2774       d++;
2775     }
2776     d_nnz[i] = d;
2777     o_nnz[i] = nz - d;
2778   }
2779   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2780   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2781 
2782   values = (PetscScalar*)V;
2783   if (!values) {
2784     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2785     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2786   }
2787   for (i=0; i<m; i++) {
2788     PetscInt          row    = i + rstart;
2789     PetscInt          ncols  = ii[i+1] - ii[i];
2790     const PetscInt    *icols = jj + ii[i];
2791     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2792       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2793       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2794     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2795       PetscInt j;
2796       for (j=0; j<ncols; j++) {
2797         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2798         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2799       }
2800     }
2801   }
2802 
2803   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2804   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2805   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2806   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 #undef __FUNCT__
2811 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2812 /*@C
2813    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2814    (the default parallel PETSc format).
2815 
2816    Collective on MPI_Comm
2817 
2818    Input Parameters:
2819 +  B - the matrix
2820 .  bs - the block size
2821 .  i - the indices into j for the start of each local row (starts with zero)
2822 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2823 -  v - optional values in the matrix
2824 
2825    Level: developer
2826 
2827    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2828    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2829    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2830    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2831    block column and the second index is over columns within a block.
2832 
2833 .keywords: matrix, aij, compressed row, sparse, parallel
2834 
2835 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2836 @*/
2837 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2838 {
2839   PetscErrorCode ierr;
2840 
2841   PetscFunctionBegin;
2842   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2843   PetscValidType(B,1);
2844   PetscValidLogicalCollectiveInt(B,bs,2);
2845   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2846   PetscFunctionReturn(0);
2847 }
2848 
2849 #undef __FUNCT__
2850 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2851 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2852 {
2853   Mat_MPIBAIJ    *b;
2854   PetscErrorCode ierr;
2855   PetscInt       i;
2856 
2857   PetscFunctionBegin;
2858   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2859   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2860   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2861   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2862 
2863   if (d_nnz) {
2864     for (i=0; i<B->rmap->n/bs; i++) {
2865       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2866     }
2867   }
2868   if (o_nnz) {
2869     for (i=0; i<B->rmap->n/bs; i++) {
2870       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2871     }
2872   }
2873 
2874   b      = (Mat_MPIBAIJ*)B->data;
2875   b->bs2 = bs*bs;
2876   b->mbs = B->rmap->n/bs;
2877   b->nbs = B->cmap->n/bs;
2878   b->Mbs = B->rmap->N/bs;
2879   b->Nbs = B->cmap->N/bs;
2880 
2881   for (i=0; i<=b->size; i++) {
2882     b->rangebs[i] = B->rmap->range[i]/bs;
2883   }
2884   b->rstartbs = B->rmap->rstart/bs;
2885   b->rendbs   = B->rmap->rend/bs;
2886   b->cstartbs = B->cmap->rstart/bs;
2887   b->cendbs   = B->cmap->rend/bs;
2888 
2889   if (!B->preallocated) {
2890     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2891     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2892     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2893     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2894     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2895     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2896     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2897     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2898     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2899   }
2900 
2901   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2902   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2903   B->preallocated = PETSC_TRUE;
2904   PetscFunctionReturn(0);
2905 }
2906 
2907 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2908 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2909 
2910 #undef __FUNCT__
2911 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2912 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2913 {
2914   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2915   PetscErrorCode ierr;
2916   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2917   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2918   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2919 
2920   PetscFunctionBegin;
2921   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2922   ii[0] = 0;
2923   for (i=0; i<M; i++) {
2924     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2925     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2926     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2927     /* remove one from count of matrix has diagonal */
2928     for (j=id[i]; j<id[i+1]; j++) {
2929       if (jd[j] == i) {ii[i+1]--;break;}
2930     }
2931   }
2932   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2933   cnt  = 0;
2934   for (i=0; i<M; i++) {
2935     for (j=io[i]; j<io[i+1]; j++) {
2936       if (garray[jo[j]] > rstart) break;
2937       jj[cnt++] = garray[jo[j]];
2938     }
2939     for (k=id[i]; k<id[i+1]; k++) {
2940       if (jd[k] != i) {
2941         jj[cnt++] = rstart + jd[k];
2942       }
2943     }
2944     for (; j<io[i+1]; j++) {
2945       jj[cnt++] = garray[jo[j]];
2946     }
2947   }
2948   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2953 
2954 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2958 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2959 {
2960   PetscErrorCode ierr;
2961   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2962   Mat            B;
2963   Mat_MPIAIJ     *b;
2964 
2965   PetscFunctionBegin;
2966   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2967 
2968   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2969   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2970   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2971   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2972   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2973   b    = (Mat_MPIAIJ*) B->data;
2974 
2975   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2976   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2977   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2978   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2979   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2980   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2981   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2982   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2983   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2984   if (reuse == MAT_REUSE_MATRIX) {
2985     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2986   } else {
2987    *newmat = B;
2988   }
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 #if defined(PETSC_HAVE_MUMPS)
2993 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2994 #endif
2995 
2996 /*MC
2997    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2998 
2999    Options Database Keys:
3000 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3001 . -mat_block_size <bs> - set the blocksize used to store the matrix
3002 - -mat_use_hash_table <fact>
3003 
3004   Level: beginner
3005 
3006 .seealso: MatCreateMPIBAIJ
3007 M*/
3008 
3009 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3010 
3011 #undef __FUNCT__
3012 #define __FUNCT__ "MatCreate_MPIBAIJ"
3013 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3014 {
3015   Mat_MPIBAIJ    *b;
3016   PetscErrorCode ierr;
3017   PetscBool      flg;
3018 
3019   PetscFunctionBegin;
3020   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3021   B->data = (void*)b;
3022 
3023   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3024   B->assembled = PETSC_FALSE;
3025 
3026   B->insertmode = NOT_SET_VALUES;
3027   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3028   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3029 
3030   /* build local table of row and column ownerships */
3031   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
3032 
3033   /* build cache for off array entries formed */
3034   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3035 
3036   b->donotstash  = PETSC_FALSE;
3037   b->colmap      = NULL;
3038   b->garray      = NULL;
3039   b->roworiented = PETSC_TRUE;
3040 
3041   /* stuff used in block assembly */
3042   b->barray = 0;
3043 
3044   /* stuff used for matrix vector multiply */
3045   b->lvec  = 0;
3046   b->Mvctx = 0;
3047 
3048   /* stuff for MatGetRow() */
3049   b->rowindices   = 0;
3050   b->rowvalues    = 0;
3051   b->getrowactive = PETSC_FALSE;
3052 
3053   /* hash table stuff */
3054   b->ht           = 0;
3055   b->hd           = 0;
3056   b->ht_size      = 0;
3057   b->ht_flag      = PETSC_FALSE;
3058   b->ht_fact      = 0;
3059   b->ht_total_ct  = 0;
3060   b->ht_insert_ct = 0;
3061 
3062   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3063   b->ijonly = PETSC_FALSE;
3064 
3065   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3066   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3067   if (flg) {
3068     PetscReal fact = 1.39;
3069     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3070     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3071     if (fact <= 1.0) fact = 1.39;
3072     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3073     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3074   }
3075   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3076 
3077 #if defined(PETSC_HAVE_MUMPS)
3078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3079 #endif
3080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3091   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 /*MC
3096    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3097 
3098    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3099    and MATMPIBAIJ otherwise.
3100 
3101    Options Database Keys:
3102 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3103 
3104   Level: beginner
3105 
3106 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3107 M*/
3108 
3109 #undef __FUNCT__
3110 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3111 /*@C
3112    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3113    (block compressed row).  For good matrix assembly performance
3114    the user should preallocate the matrix storage by setting the parameters
3115    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3116    performance can be increased by more than a factor of 50.
3117 
3118    Collective on Mat
3119 
3120    Input Parameters:
3121 +  B - the matrix
3122 .  bs   - size of block
3123 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3124            submatrix  (same for all local rows)
3125 .  d_nnz - array containing the number of block nonzeros in the various block rows
3126            of the in diagonal portion of the local (possibly different for each block
3127            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3128            set it even if it is zero.
3129 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3130            submatrix (same for all local rows).
3131 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3132            off-diagonal portion of the local submatrix (possibly different for
3133            each block row) or NULL.
3134 
3135    If the *_nnz parameter is given then the *_nz parameter is ignored
3136 
3137    Options Database Keys:
3138 +   -mat_block_size - size of the blocks to use
3139 -   -mat_use_hash_table <fact>
3140 
3141    Notes:
3142    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3143    than it must be used on all processors that share the object for that argument.
3144 
3145    Storage Information:
3146    For a square global matrix we define each processor's diagonal portion
3147    to be its local rows and the corresponding columns (a square submatrix);
3148    each processor's off-diagonal portion encompasses the remainder of the
3149    local matrix (a rectangular submatrix).
3150 
3151    The user can specify preallocated storage for the diagonal part of
3152    the local submatrix with either d_nz or d_nnz (not both).  Set
3153    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3154    memory allocation.  Likewise, specify preallocated storage for the
3155    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3156 
3157    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3158    the figure below we depict these three local rows and all columns (0-11).
3159 
3160 .vb
3161            0 1 2 3 4 5 6 7 8 9 10 11
3162           --------------------------
3163    row 3  |o o o d d d o o o o  o  o
3164    row 4  |o o o d d d o o o o  o  o
3165    row 5  |o o o d d d o o o o  o  o
3166           --------------------------
3167 .ve
3168 
3169    Thus, any entries in the d locations are stored in the d (diagonal)
3170    submatrix, and any entries in the o locations are stored in the
3171    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3172    stored simply in the MATSEQBAIJ format for compressed row storage.
3173 
3174    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3175    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3176    In general, for PDE problems in which most nonzeros are near the diagonal,
3177    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3178    or you will get TERRIBLE performance; see the users' manual chapter on
3179    matrices.
3180 
3181    You can call MatGetInfo() to get information on how effective the preallocation was;
3182    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3183    You can also run with the option -info and look for messages with the string
3184    malloc in them to see if additional memory allocation was needed.
3185 
3186    Level: intermediate
3187 
3188 .keywords: matrix, block, aij, compressed row, sparse, parallel
3189 
3190 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3191 @*/
3192 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3193 {
3194   PetscErrorCode ierr;
3195 
3196   PetscFunctionBegin;
3197   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3198   PetscValidType(B,1);
3199   PetscValidLogicalCollectiveInt(B,bs,2);
3200   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3201   PetscFunctionReturn(0);
3202 }
3203 
3204 #undef __FUNCT__
3205 #define __FUNCT__ "MatCreateBAIJ"
3206 /*@C
3207    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3208    (block compressed row).  For good matrix assembly performance
3209    the user should preallocate the matrix storage by setting the parameters
3210    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3211    performance can be increased by more than a factor of 50.
3212 
3213    Collective on MPI_Comm
3214 
3215    Input Parameters:
3216 +  comm - MPI communicator
3217 .  bs   - size of blockk
3218 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3219            This value should be the same as the local size used in creating the
3220            y vector for the matrix-vector product y = Ax.
3221 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3222            This value should be the same as the local size used in creating the
3223            x vector for the matrix-vector product y = Ax.
3224 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3225 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3226 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3227            submatrix  (same for all local rows)
3228 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3229            of the in diagonal portion of the local (possibly different for each block
3230            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3231            and set it even if it is zero.
3232 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3233            submatrix (same for all local rows).
3234 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3235            off-diagonal portion of the local submatrix (possibly different for
3236            each block row) or NULL.
3237 
3238    Output Parameter:
3239 .  A - the matrix
3240 
3241    Options Database Keys:
3242 +   -mat_block_size - size of the blocks to use
3243 -   -mat_use_hash_table <fact>
3244 
3245    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3246    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3247    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3248 
3249    Notes:
3250    If the *_nnz parameter is given then the *_nz parameter is ignored
3251 
3252    A nonzero block is any block that as 1 or more nonzeros in it
3253 
3254    The user MUST specify either the local or global matrix dimensions
3255    (possibly both).
3256 
3257    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3258    than it must be used on all processors that share the object for that argument.
3259 
3260    Storage Information:
3261    For a square global matrix we define each processor's diagonal portion
3262    to be its local rows and the corresponding columns (a square submatrix);
3263    each processor's off-diagonal portion encompasses the remainder of the
3264    local matrix (a rectangular submatrix).
3265 
3266    The user can specify preallocated storage for the diagonal part of
3267    the local submatrix with either d_nz or d_nnz (not both).  Set
3268    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3269    memory allocation.  Likewise, specify preallocated storage for the
3270    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3271 
3272    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3273    the figure below we depict these three local rows and all columns (0-11).
3274 
3275 .vb
3276            0 1 2 3 4 5 6 7 8 9 10 11
3277           --------------------------
3278    row 3  |o o o d d d o o o o  o  o
3279    row 4  |o o o d d d o o o o  o  o
3280    row 5  |o o o d d d o o o o  o  o
3281           --------------------------
3282 .ve
3283 
3284    Thus, any entries in the d locations are stored in the d (diagonal)
3285    submatrix, and any entries in the o locations are stored in the
3286    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3287    stored simply in the MATSEQBAIJ format for compressed row storage.
3288 
3289    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3290    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3291    In general, for PDE problems in which most nonzeros are near the diagonal,
3292    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3293    or you will get TERRIBLE performance; see the users' manual chapter on
3294    matrices.
3295 
3296    Level: intermediate
3297 
3298 .keywords: matrix, block, aij, compressed row, sparse, parallel
3299 
3300 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3301 @*/
3302 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3303 {
3304   PetscErrorCode ierr;
3305   PetscMPIInt    size;
3306 
3307   PetscFunctionBegin;
3308   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3309   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3310   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3311   if (size > 1) {
3312     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3313     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3314   } else {
3315     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3316     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3317   }
3318   PetscFunctionReturn(0);
3319 }
3320 
3321 #undef __FUNCT__
3322 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3323 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3324 {
3325   Mat            mat;
3326   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3327   PetscErrorCode ierr;
3328   PetscInt       len=0;
3329 
3330   PetscFunctionBegin;
3331   *newmat = 0;
3332   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3333   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3334   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3335   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3336 
3337   mat->factortype   = matin->factortype;
3338   mat->preallocated = PETSC_TRUE;
3339   mat->assembled    = PETSC_TRUE;
3340   mat->insertmode   = NOT_SET_VALUES;
3341 
3342   a             = (Mat_MPIBAIJ*)mat->data;
3343   mat->rmap->bs = matin->rmap->bs;
3344   a->bs2        = oldmat->bs2;
3345   a->mbs        = oldmat->mbs;
3346   a->nbs        = oldmat->nbs;
3347   a->Mbs        = oldmat->Mbs;
3348   a->Nbs        = oldmat->Nbs;
3349 
3350   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3351   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3352 
3353   a->size         = oldmat->size;
3354   a->rank         = oldmat->rank;
3355   a->donotstash   = oldmat->donotstash;
3356   a->roworiented  = oldmat->roworiented;
3357   a->rowindices   = 0;
3358   a->rowvalues    = 0;
3359   a->getrowactive = PETSC_FALSE;
3360   a->barray       = 0;
3361   a->rstartbs     = oldmat->rstartbs;
3362   a->rendbs       = oldmat->rendbs;
3363   a->cstartbs     = oldmat->cstartbs;
3364   a->cendbs       = oldmat->cendbs;
3365 
3366   /* hash table stuff */
3367   a->ht           = 0;
3368   a->hd           = 0;
3369   a->ht_size      = 0;
3370   a->ht_flag      = oldmat->ht_flag;
3371   a->ht_fact      = oldmat->ht_fact;
3372   a->ht_total_ct  = 0;
3373   a->ht_insert_ct = 0;
3374 
3375   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3376   if (oldmat->colmap) {
3377 #if defined(PETSC_USE_CTABLE)
3378     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3379 #else
3380     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3381     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3382     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3383 #endif
3384   } else a->colmap = 0;
3385 
3386   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3387     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3388     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3389     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3390   } else a->garray = 0;
3391 
3392   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3393   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3394   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3395   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3396   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3397 
3398   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3399   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3400   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3401   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3402   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3403   *newmat = mat;
3404   PetscFunctionReturn(0);
3405 }
3406 
3407 #undef __FUNCT__
3408 #define __FUNCT__ "MatLoad_MPIBAIJ"
3409 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3410 {
3411   PetscErrorCode ierr;
3412   int            fd;
3413   PetscInt       i,nz,j,rstart,rend;
3414   PetscScalar    *vals,*buf;
3415   MPI_Comm       comm;
3416   MPI_Status     status;
3417   PetscMPIInt    rank,size,maxnz;
3418   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3419   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3420   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3421   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3422   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3423   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3424 
3425   PetscFunctionBegin;
3426   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3427   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3428   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3429   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3430 
3431   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3432   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3433   if (!rank) {
3434     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3435     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3436     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3437   }
3438 
3439   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3440 
3441   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3442   M    = header[1]; N = header[2];
3443 
3444   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3445   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3446   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3447 
3448   /* If global sizes are set, check if they are consistent with that given in the file */
3449   if (sizesset) {
3450     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3451   }
3452   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3453   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3454 
3455   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3456 
3457   /*
3458      This code adds extra rows to make sure the number of rows is
3459      divisible by the blocksize
3460   */
3461   Mbs        = M/bs;
3462   extra_rows = bs - M + bs*Mbs;
3463   if (extra_rows == bs) extra_rows = 0;
3464   else                  Mbs++;
3465   if (extra_rows && !rank) {
3466     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3467   }
3468 
3469   /* determine ownership of all rows */
3470   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3471     mbs = Mbs/size + ((Mbs % size) > rank);
3472     m   = mbs*bs;
3473   } else { /* User set */
3474     m   = newmat->rmap->n;
3475     mbs = m/bs;
3476   }
3477   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3478   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3479 
3480   /* process 0 needs enough room for process with most rows */
3481   if (!rank) {
3482     mmax = rowners[1];
3483     for (i=2; i<=size; i++) {
3484       mmax = PetscMax(mmax,rowners[i]);
3485     }
3486     mmax*=bs;
3487   } else mmax = -1;             /* unused, but compiler warns anyway */
3488 
3489   rowners[0] = 0;
3490   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3491   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3492   rstart = rowners[rank];
3493   rend   = rowners[rank+1];
3494 
3495   /* distribute row lengths to all processors */
3496   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3497   if (!rank) {
3498     mend = m;
3499     if (size == 1) mend = mend - extra_rows;
3500     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3501     for (j=mend; j<m; j++) locrowlens[j] = 1;
3502     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3503     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3504     for (j=0; j<m; j++) {
3505       procsnz[0] += locrowlens[j];
3506     }
3507     for (i=1; i<size; i++) {
3508       mend = browners[i+1] - browners[i];
3509       if (i == size-1) mend = mend - extra_rows;
3510       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3511       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3512       /* calculate the number of nonzeros on each processor */
3513       for (j=0; j<browners[i+1]-browners[i]; j++) {
3514         procsnz[i] += rowlengths[j];
3515       }
3516       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3517     }
3518     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3519   } else {
3520     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3521   }
3522 
3523   if (!rank) {
3524     /* determine max buffer needed and allocate it */
3525     maxnz = procsnz[0];
3526     for (i=1; i<size; i++) {
3527       maxnz = PetscMax(maxnz,procsnz[i]);
3528     }
3529     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3530 
3531     /* read in my part of the matrix column indices  */
3532     nz     = procsnz[0];
3533     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3534     mycols = ibuf;
3535     if (size == 1) nz -= extra_rows;
3536     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3537     if (size == 1) {
3538       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3539     }
3540 
3541     /* read in every ones (except the last) and ship off */
3542     for (i=1; i<size-1; i++) {
3543       nz   = procsnz[i];
3544       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3545       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3546     }
3547     /* read in the stuff for the last proc */
3548     if (size != 1) {
3549       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3550       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3551       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3552       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3553     }
3554     ierr = PetscFree(cols);CHKERRQ(ierr);
3555   } else {
3556     /* determine buffer space needed for message */
3557     nz = 0;
3558     for (i=0; i<m; i++) {
3559       nz += locrowlens[i];
3560     }
3561     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3562     mycols = ibuf;
3563     /* receive message of column indices*/
3564     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3565     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3566     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3567   }
3568 
3569   /* loop over local rows, determining number of off diagonal entries */
3570   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3571   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3572   rowcount = 0; nzcount = 0;
3573   for (i=0; i<mbs; i++) {
3574     dcount  = 0;
3575     odcount = 0;
3576     for (j=0; j<bs; j++) {
3577       kmax = locrowlens[rowcount];
3578       for (k=0; k<kmax; k++) {
3579         tmp = mycols[nzcount++]/bs;
3580         if (!mask[tmp]) {
3581           mask[tmp] = 1;
3582           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3583           else masked1[dcount++] = tmp;
3584         }
3585       }
3586       rowcount++;
3587     }
3588 
3589     dlens[i]  = dcount;
3590     odlens[i] = odcount;
3591 
3592     /* zero out the mask elements we set */
3593     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3594     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3595   }
3596 
3597 
3598   if (!sizesset) {
3599     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3600   }
3601   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3602 
3603   if (!rank) {
3604     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3605     /* read in my part of the matrix numerical values  */
3606     nz     = procsnz[0];
3607     vals   = buf;
3608     mycols = ibuf;
3609     if (size == 1) nz -= extra_rows;
3610     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3611     if (size == 1) {
3612       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3613     }
3614 
3615     /* insert into matrix */
3616     jj = rstart*bs;
3617     for (i=0; i<m; i++) {
3618       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3619       mycols += locrowlens[i];
3620       vals   += locrowlens[i];
3621       jj++;
3622     }
3623     /* read in other processors (except the last one) and ship out */
3624     for (i=1; i<size-1; i++) {
3625       nz   = procsnz[i];
3626       vals = buf;
3627       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3628       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3629     }
3630     /* the last proc */
3631     if (size != 1) {
3632       nz   = procsnz[i] - extra_rows;
3633       vals = buf;
3634       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3635       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3636       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3637     }
3638     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3639   } else {
3640     /* receive numeric values */
3641     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3642 
3643     /* receive message of values*/
3644     vals   = buf;
3645     mycols = ibuf;
3646     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3647 
3648     /* insert into matrix */
3649     jj = rstart*bs;
3650     for (i=0; i<m; i++) {
3651       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3652       mycols += locrowlens[i];
3653       vals   += locrowlens[i];
3654       jj++;
3655     }
3656   }
3657   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3658   ierr = PetscFree(buf);CHKERRQ(ierr);
3659   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3660   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3661   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3662   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3663   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3664   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3665   PetscFunctionReturn(0);
3666 }
3667 
3668 #undef __FUNCT__
3669 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3670 /*@
3671    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3672 
3673    Input Parameters:
3674 .  mat  - the matrix
3675 .  fact - factor
3676 
3677    Not Collective, each process can use a different factor
3678 
3679    Level: advanced
3680 
3681   Notes:
3682    This can also be set by the command line option: -mat_use_hash_table <fact>
3683 
3684 .keywords: matrix, hashtable, factor, HT
3685 
3686 .seealso: MatSetOption()
3687 @*/
3688 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3689 {
3690   PetscErrorCode ierr;
3691 
3692   PetscFunctionBegin;
3693   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3694   PetscFunctionReturn(0);
3695 }
3696 
3697 #undef __FUNCT__
3698 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3699 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3700 {
3701   Mat_MPIBAIJ *baij;
3702 
3703   PetscFunctionBegin;
3704   baij          = (Mat_MPIBAIJ*)mat->data;
3705   baij->ht_fact = fact;
3706   PetscFunctionReturn(0);
3707 }
3708 
3709 #undef __FUNCT__
3710 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3711 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3712 {
3713   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3714 
3715   PetscFunctionBegin;
3716   if (Ad)     *Ad     = a->A;
3717   if (Ao)     *Ao     = a->B;
3718   if (colmap) *colmap = a->garray;
3719   PetscFunctionReturn(0);
3720 }
3721 
3722 /*
3723     Special version for direct calls from Fortran (to eliminate two function call overheads
3724 */
3725 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3726 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3727 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3728 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3729 #endif
3730 
3731 #undef __FUNCT__
3732 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3733 /*@C
3734   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3735 
3736   Collective on Mat
3737 
3738   Input Parameters:
3739 + mat - the matrix
3740 . min - number of input rows
3741 . im - input rows
3742 . nin - number of input columns
3743 . in - input columns
3744 . v - numerical values input
3745 - addvin - INSERT_VALUES or ADD_VALUES
3746 
3747   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3748 
3749   Level: advanced
3750 
3751 .seealso:   MatSetValuesBlocked()
3752 @*/
3753 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3754 {
3755   /* convert input arguments to C version */
3756   Mat        mat  = *matin;
3757   PetscInt   m    = *min, n = *nin;
3758   InsertMode addv = *addvin;
3759 
3760   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3761   const MatScalar *value;
3762   MatScalar       *barray     = baij->barray;
3763   PetscBool       roworiented = baij->roworiented;
3764   PetscErrorCode  ierr;
3765   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3766   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3767   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3768 
3769   PetscFunctionBegin;
3770   /* tasks normally handled by MatSetValuesBlocked() */
3771   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3772 #if defined(PETSC_USE_DEBUG)
3773   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3774   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3775 #endif
3776   if (mat->assembled) {
3777     mat->was_assembled = PETSC_TRUE;
3778     mat->assembled     = PETSC_FALSE;
3779   }
3780   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3781 
3782 
3783   if (!barray) {
3784     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3785     baij->barray = barray;
3786   }
3787 
3788   if (roworiented) stepval = (n-1)*bs;
3789   else stepval = (m-1)*bs;
3790 
3791   for (i=0; i<m; i++) {
3792     if (im[i] < 0) continue;
3793 #if defined(PETSC_USE_DEBUG)
3794     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3795 #endif
3796     if (im[i] >= rstart && im[i] < rend) {
3797       row = im[i] - rstart;
3798       for (j=0; j<n; j++) {
3799         /* If NumCol = 1 then a copy is not required */
3800         if ((roworiented) && (n == 1)) {
3801           barray = (MatScalar*)v + i*bs2;
3802         } else if ((!roworiented) && (m == 1)) {
3803           barray = (MatScalar*)v + j*bs2;
3804         } else { /* Here a copy is required */
3805           if (roworiented) {
3806             value = v + i*(stepval+bs)*bs + j*bs;
3807           } else {
3808             value = v + j*(stepval+bs)*bs + i*bs;
3809           }
3810           for (ii=0; ii<bs; ii++,value+=stepval) {
3811             for (jj=0; jj<bs; jj++) {
3812               *barray++ = *value++;
3813             }
3814           }
3815           barray -=bs2;
3816         }
3817 
3818         if (in[j] >= cstart && in[j] < cend) {
3819           col  = in[j] - cstart;
3820           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3821         } else if (in[j] < 0) continue;
3822 #if defined(PETSC_USE_DEBUG)
3823         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3824 #endif
3825         else {
3826           if (mat->was_assembled) {
3827             if (!baij->colmap) {
3828               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3829             }
3830 
3831 #if defined(PETSC_USE_DEBUG)
3832 #if defined(PETSC_USE_CTABLE)
3833             { PetscInt data;
3834               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3835               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3836             }
3837 #else
3838             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3839 #endif
3840 #endif
3841 #if defined(PETSC_USE_CTABLE)
3842             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3843             col  = (col - 1)/bs;
3844 #else
3845             col = (baij->colmap[in[j]] - 1)/bs;
3846 #endif
3847             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3848               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3849               col  =  in[j];
3850             }
3851           } else col = in[j];
3852           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3853         }
3854       }
3855     } else {
3856       if (!baij->donotstash) {
3857         if (roworiented) {
3858           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3859         } else {
3860           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3861         }
3862       }
3863     }
3864   }
3865 
3866   /* task normally handled by MatSetValuesBlocked() */
3867   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3868   PetscFunctionReturn(0);
3869 }
3870 
3871 #undef __FUNCT__
3872 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3873 /*@
3874      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3875          CSR format the local rows.
3876 
3877    Collective on MPI_Comm
3878 
3879    Input Parameters:
3880 +  comm - MPI communicator
3881 .  bs - the block size, only a block size of 1 is supported
3882 .  m - number of local rows (Cannot be PETSC_DECIDE)
3883 .  n - This value should be the same as the local size used in creating the
3884        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3885        calculated if N is given) For square matrices n is almost always m.
3886 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3887 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3888 .   i - row indices
3889 .   j - column indices
3890 -   a - matrix values
3891 
3892    Output Parameter:
3893 .   mat - the matrix
3894 
3895    Level: intermediate
3896 
3897    Notes:
3898        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3899      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3900      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3901 
3902      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3903      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3904      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3905      with column-major ordering within blocks.
3906 
3907        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3908 
3909 .keywords: matrix, aij, compressed row, sparse, parallel
3910 
3911 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3912           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3913 @*/
3914 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3915 {
3916   PetscErrorCode ierr;
3917 
3918   PetscFunctionBegin;
3919   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3920   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3921   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3922   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3923   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3924   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3925   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3926   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3927   PetscFunctionReturn(0);
3928 }
3929