xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f6c5ca7819faa18d8a3a15fb5c97bf2c315aa240)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined(PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col  =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B    = baij->B;
249               b    = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba   =b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray     = baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) stepval = (n-1)*bs;
296   else stepval = (m-1)*bs;
297 
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if ((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315             value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend) {
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         } else if (in[j] < 0) continue;
328 #if defined(PETSC_USE_DEBUG)
329         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
330 #endif
331         else {
332           if (mat->was_assembled) {
333             if (!baij->colmap) {
334               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
335             }
336 
337 #if defined(PETSC_USE_DEBUG)
338 #if defined(PETSC_USE_CTABLE)
339             { PetscInt data;
340               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
341               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
342             }
343 #else
344             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345 #endif
346 #endif
347 #if defined(PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
349             col  = (col - 1)/bs;
350 #else
351             col = (baij->colmap[in[j]] - 1)/bs;
352 #endif
353             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
354               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
355               col  =  in[j];
356             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
357           } else col = in[j];
358           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
359         }
360       }
361     } else {
362       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
363       if (!baij->donotstash) {
364         if (roworiented) {
365           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
366         } else {
367           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 #define HASH_KEY 0.6180339887
376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
382 {
383   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
384   PetscBool      roworiented = baij->roworiented;
385   PetscErrorCode ierr;
386   PetscInt       i,j,row,col;
387   PetscInt       rstart_orig=mat->rmap->rstart;
388   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
389   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
390   PetscReal      tmp;
391   MatScalar      **HD = baij->hd,value;
392 #if defined(PETSC_USE_DEBUG)
393   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
394 #endif
395 
396   PetscFunctionBegin;
397   for (i=0; i<m; i++) {
398 #if defined(PETSC_USE_DEBUG)
399     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
400     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
401 #endif
402     row = im[i];
403     if (row >= rstart_orig && row < rend_orig) {
404       for (j=0; j<n; j++) {
405         col = in[j];
406         if (roworiented) value = v[i*n+j];
407         else             value = v[i+j*m];
408         /* Look up PetscInto the Hash Table */
409         key = (row/bs)*Nbs+(col/bs)+1;
410         h1  = HASH(size,key,tmp);
411 
412 
413         idx = h1;
414 #if defined(PETSC_USE_DEBUG)
415         insert_ct++;
416         total_ct++;
417         if (HT[idx] != key) {
418           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
419           if (idx == size) {
420             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
421             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
427           if (idx == size) {
428             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
429             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
430           }
431         }
432 #endif
433         /* A HASH table entry is found, so insert the values at the correct address */
434         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
435         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
436       }
437     } else if (!baij->donotstash) {
438       if (roworiented) {
439         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
440       } else {
441         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
442       }
443     }
444   }
445 #if defined(PETSC_USE_DEBUG)
446   baij->ht_total_ct  = total_ct;
447   baij->ht_insert_ct = insert_ct;
448 #endif
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
454 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
457   PetscBool         roworiented = baij->roworiented;
458   PetscErrorCode    ierr;
459   PetscInt          i,j,ii,jj,row,col;
460   PetscInt          rstart=baij->rstartbs;
461   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
462   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
463   PetscReal         tmp;
464   MatScalar         **HD = baij->hd,*baij_a;
465   const PetscScalar *v_t,*value;
466 #if defined(PETSC_USE_DEBUG)
467   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
468 #endif
469 
470   PetscFunctionBegin;
471   if (roworiented) stepval = (n-1)*bs;
472   else stepval = (m-1)*bs;
473 
474   for (i=0; i<m; i++) {
475 #if defined(PETSC_USE_DEBUG)
476     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
477     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
478 #endif
479     row = im[i];
480     v_t = v + i*nbs2;
481     if (row >= rstart && row < rend) {
482       for (j=0; j<n; j++) {
483         col = in[j];
484 
485         /* Look up into the Hash Table */
486         key = row*Nbs+col+1;
487         h1  = HASH(size,key,tmp);
488 
489         idx = h1;
490 #if defined(PETSC_USE_DEBUG)
491         total_ct++;
492         insert_ct++;
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #else
501         if (HT[idx] != key) {
502           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
503           if (idx == size) {
504             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
505             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
506           }
507         }
508 #endif
509         baij_a = HD[idx];
510         if (roworiented) {
511           /*value = v + i*(stepval+bs)*bs + j*bs;*/
512           /* value = v + (i*(stepval+bs)+j)*bs; */
513           value = v_t;
514           v_t  += bs;
515           if (addv == ADD_VALUES) {
516             for (ii=0; ii<bs; ii++,value+=stepval) {
517               for (jj=ii; jj<bs2; jj+=bs) {
518                 baij_a[jj] += *value++;
519               }
520             }
521           } else {
522             for (ii=0; ii<bs; ii++,value+=stepval) {
523               for (jj=ii; jj<bs2; jj+=bs) {
524                 baij_a[jj] = *value++;
525               }
526             }
527           }
528         } else {
529           value = v + j*(stepval+bs)*bs + i*bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
532               for (jj=0; jj<bs; jj++) {
533                 baij_a[jj] += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
538               for (jj=0; jj<bs; jj++) {
539                 baij_a[jj] = *value++;
540               }
541             }
542           }
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555 #if defined(PETSC_USE_DEBUG)
556   baij->ht_total_ct  = total_ct;
557   baij->ht_insert_ct = insert_ct;
558 #endif
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatGetValues_MPIBAIJ"
564 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
565 {
566   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscErrorCode ierr;
568   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
569   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
574     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
575     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
576       row = idxm[i] - bsrstart;
577       for (j=0; j<n; j++) {
578         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
579         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
580         if (idxn[j] >= bscstart && idxn[j] < bscend) {
581           col  = idxn[j] - bscstart;
582           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
583         } else {
584           if (!baij->colmap) {
585             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
586           }
587 #if defined(PETSC_USE_CTABLE)
588           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
589           data--;
590 #else
591           data = baij->colmap[idxn[j]/bs]-1;
592 #endif
593           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
594           else {
595             col  = data + idxn[j]%bs;
596             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
597           }
598         }
599       }
600     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatNorm_MPIBAIJ"
607 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
608 {
609   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
610   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
611   PetscErrorCode ierr;
612   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
613   PetscReal      sum = 0.0;
614   MatScalar      *v;
615 
616   PetscFunctionBegin;
617   if (baij->size == 1) {
618     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
619   } else {
620     if (type == NORM_FROBENIUS) {
621       v  = amat->a;
622       nz = amat->nz*bs2;
623       for (i=0; i<nz; i++) {
624         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
625       }
626       v  = bmat->a;
627       nz = bmat->nz*bs2;
628       for (i=0; i<nz; i++) {
629         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
630       }
631       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
632       *nrm = PetscSqrtReal(*nrm);
633     } else if (type == NORM_1) { /* max column sum */
634       PetscReal *tmp,*tmp2;
635       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
636       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
637       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
638       v    = amat->a; jj = amat->j;
639       for (i=0; i<amat->nz; i++) {
640         for (j=0; j<bs; j++) {
641           col = bs*(cstart + *jj) + j; /* column index */
642           for (row=0; row<bs; row++) {
643             tmp[col] += PetscAbsScalar(*v);  v++;
644           }
645         }
646         jj++;
647       }
648       v = bmat->a; jj = bmat->j;
649       for (i=0; i<bmat->nz; i++) {
650         for (j=0; j<bs; j++) {
651           col = bs*garray[*jj] + j;
652           for (row=0; row<bs; row++) {
653             tmp[col] += PetscAbsScalar(*v); v++;
654           }
655         }
656         jj++;
657       }
658       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
659       *nrm = 0.0;
660       for (j=0; j<mat->cmap->N; j++) {
661         if (tmp2[j] > *nrm) *nrm = tmp2[j];
662       }
663       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
664     } else if (type == NORM_INFINITY) { /* max row sum */
665       PetscReal *sums;
666       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
667       sum  = 0.0;
668       for (j=0; j<amat->mbs; j++) {
669         for (row=0; row<bs; row++) sums[row] = 0.0;
670         v  = amat->a + bs2*amat->i[j];
671         nz = amat->i[j+1]-amat->i[j];
672         for (i=0; i<nz; i++) {
673           for (col=0; col<bs; col++) {
674             for (row=0; row<bs; row++) {
675               sums[row] += PetscAbsScalar(*v); v++;
676             }
677           }
678         }
679         v  = bmat->a + bs2*bmat->i[j];
680         nz = bmat->i[j+1]-bmat->i[j];
681         for (i=0; i<nz; i++) {
682           for (col=0; col<bs; col++) {
683             for (row=0; row<bs; row++) {
684               sums[row] += PetscAbsScalar(*v); v++;
685             }
686           }
687         }
688         for (row=0; row<bs; row++) {
689           if (sums[row] > sum) sum = sums[row];
690         }
691       }
692       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
693       ierr = PetscFree(sums);CHKERRQ(ierr);
694     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
695   }
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   Creates the hash table, and sets the table
701   This table is created only once.
702   If new entried need to be added to the matrix
703   then the hash table has to be destroyed and
704   recreated.
705 */
706 #undef __FUNCT__
707 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
708 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
709 {
710   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
711   Mat            A     = baij->A,B=baij->B;
712   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
713   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
714   PetscErrorCode ierr;
715   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
716   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
717   PetscInt       *HT,key;
718   MatScalar      **HD;
719   PetscReal      tmp;
720 #if defined(PETSC_USE_INFO)
721   PetscInt ct=0,max=0;
722 #endif
723 
724   PetscFunctionBegin;
725   if (baij->ht) PetscFunctionReturn(0);
726 
727   baij->ht_size = (PetscInt)(factor*nz);
728   ht_size       = baij->ht_size;
729 
730   /* Allocate Memory for Hash Table */
731   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
732   HD   = baij->hd;
733   HT   = baij->ht;
734 
735   /* Loop Over A */
736   for (i=0; i<a->mbs; i++) {
737     for (j=ai[i]; j<ai[i+1]; j++) {
738       row = i+rstart;
739       col = aj[j]+cstart;
740 
741       key = row*Nbs + col + 1;
742       h1  = HASH(ht_size,key,tmp);
743       for (k=0; k<ht_size; k++) {
744         if (!HT[(h1+k)%ht_size]) {
745           HT[(h1+k)%ht_size] = key;
746           HD[(h1+k)%ht_size] = a->a + j*bs2;
747           break;
748 #if defined(PETSC_USE_INFO)
749         } else {
750           ct++;
751 #endif
752         }
753       }
754 #if defined(PETSC_USE_INFO)
755       if (k> max) max = k;
756 #endif
757     }
758   }
759   /* Loop Over B */
760   for (i=0; i<b->mbs; i++) {
761     for (j=bi[i]; j<bi[i+1]; j++) {
762       row = i+rstart;
763       col = garray[bj[j]];
764       key = row*Nbs + col + 1;
765       h1  = HASH(ht_size,key,tmp);
766       for (k=0; k<ht_size; k++) {
767         if (!HT[(h1+k)%ht_size]) {
768           HT[(h1+k)%ht_size] = key;
769           HD[(h1+k)%ht_size] = b->a + j*bs2;
770           break;
771 #if defined(PETSC_USE_INFO)
772         } else {
773           ct++;
774 #endif
775         }
776       }
777 #if defined(PETSC_USE_INFO)
778       if (k> max) max = k;
779 #endif
780     }
781   }
782 
783   /* Print Summary */
784 #if defined(PETSC_USE_INFO)
785   for (i=0,j=0; i<ht_size; i++) {
786     if (HT[i]) j++;
787   }
788   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
789 #endif
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
795 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
796 {
797   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
798   PetscErrorCode ierr;
799   PetscInt       nstash,reallocs;
800   InsertMode     addv;
801 
802   PetscFunctionBegin;
803   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
804 
805   /* make sure all processors are either in INSERTMODE or ADDMODE */
806   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
807   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
808   mat->insertmode = addv; /* in case this processor had no cache */
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
812   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
813   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
821 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
822 {
823   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
824   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
825   PetscErrorCode ierr;
826   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
827   PetscInt       *row,*col;
828   PetscBool      r1,r2,r3,other_disassembled;
829   MatScalar      *val;
830   InsertMode     addv = mat->insertmode;
831   PetscMPIInt    n;
832 
833   PetscFunctionBegin;
834   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
835   if (!baij->donotstash && !mat->nooffprocentries) {
836     while (1) {
837       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
838       if (!flg) break;
839 
840       for (i=0; i<n;) {
841         /* Now identify the consecutive vals belonging to the same row */
842         for (j=i,rstart=row[j]; j<n; j++) {
843           if (row[j] != rstart) break;
844         }
845         if (j < n) ncols = j-i;
846         else       ncols = n-i;
847         /* Now assemble all these values with a single function call */
848         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
849         i    = j;
850       }
851     }
852     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
853     /* Now process the block-stash. Since the values are stashed column-oriented,
854        set the roworiented flag to column oriented, and after MatSetValues()
855        restore the original flags */
856     r1 = baij->roworiented;
857     r2 = a->roworiented;
858     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
859 
860     baij->roworiented = PETSC_FALSE;
861     a->roworiented    = PETSC_FALSE;
862 
863     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
864     while (1) {
865       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
866       if (!flg) break;
867 
868       for (i=0; i<n;) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for (j=i,rstart=row[j]; j<n; j++) {
871           if (row[j] != rstart) break;
872         }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
876         i    = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
880 
881     baij->roworiented = r1;
882     a->roworiented    = r2;
883 
884     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
885   }
886 
887   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
888   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
889 
890   /* determine if any processor has disassembled, if so we must
891      also disassemble ourselfs, in order that we may reassemble. */
892   /*
893      if nonzero structure of submatrix B cannot change then we know that
894      no processor disassembled thus we can skip this stuff
895   */
896   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
897     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
898     if (mat->was_assembled && !other_disassembled) {
899       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
900     }
901   }
902 
903   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
904     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
905   }
906   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
907   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
908 
909 #if defined(PETSC_USE_INFO)
910   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
911     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
912 
913     baij->ht_total_ct  = 0;
914     baij->ht_insert_ct = 0;
915   }
916 #endif
917   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
918     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
919 
920     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
921     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
922   }
923 
924   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
925 
926   baij->rowvalues = 0;
927 
928   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
929   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
930     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
931     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
937 #include <petscdraw.h>
938 #undef __FUNCT__
939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
941 {
942   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
943   PetscErrorCode    ierr;
944   PetscMPIInt       rank = baij->rank;
945   PetscInt          bs   = mat->rmap->bs;
946   PetscBool         iascii,isdraw;
947   PetscViewer       sviewer;
948   PetscViewerFormat format;
949 
950   PetscFunctionBegin;
951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
953   if (iascii) {
954     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
955     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
956       MatInfo info;
957       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
958       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
961                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
964       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
966       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
967       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
968       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
969       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     } else if (format == PETSC_VIEWER_ASCII_INFO) {
972       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
973       PetscFunctionReturn(0);
974     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
975       PetscFunctionReturn(0);
976     }
977   }
978 
979   if (isdraw) {
980     PetscDraw draw;
981     PetscBool isnull;
982     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
983     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
984   }
985 
986   {
987     /* assemble the entire matrix onto first processor. */
988     Mat         A;
989     Mat_SeqBAIJ *Aloc;
990     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
991     MatScalar   *a;
992     const char  *matname;
993 
994     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
995     /* Perhaps this should be the type of mat? */
996     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
997     if (!rank) {
998       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
999     } else {
1000       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1001     }
1002     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1003     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1004     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1005     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1006 
1007     /* copy over the A part */
1008     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1009     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1010     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1011 
1012     for (i=0; i<mbs; i++) {
1013       rvals[0] = bs*(baij->rstartbs + i);
1014       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1015       for (j=ai[i]; j<ai[i+1]; j++) {
1016         col = (baij->cstartbs+aj[j])*bs;
1017         for (k=0; k<bs; k++) {
1018           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1019           col++; a += bs;
1020         }
1021       }
1022     }
1023     /* copy over the B part */
1024     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1025     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1026     for (i=0; i<mbs; i++) {
1027       rvals[0] = bs*(baij->rstartbs + i);
1028       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1029       for (j=ai[i]; j<ai[i+1]; j++) {
1030         col = baij->garray[aj[j]]*bs;
1031         for (k=0; k<bs; k++) {
1032           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1033           col++; a += bs;
1034         }
1035       }
1036     }
1037     ierr = PetscFree(rvals);CHKERRQ(ierr);
1038     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040     /*
1041        Everyone has to call to draw the matrix since the graphics waits are
1042        synchronized across all processors that share the PetscDraw object
1043     */
1044     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1045     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1046     if (!rank) {
1047       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1048       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1049     }
1050     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1051     ierr = MatDestroy(&A);CHKERRQ(ierr);
1052   }
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1058 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1059 {
1060   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1061   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1062   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1063   PetscErrorCode ierr;
1064   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1065   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1066   int            fd;
1067   PetscScalar    *column_values;
1068   FILE           *file;
1069   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1070   PetscInt       message_count,flowcontrolcount;
1071 
1072   PetscFunctionBegin;
1073   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1074   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1075   nz   = bs2*(A->nz + B->nz);
1076   rlen = mat->rmap->n;
1077   if (!rank) {
1078     header[0] = MAT_FILE_CLASSID;
1079     header[1] = mat->rmap->N;
1080     header[2] = mat->cmap->N;
1081 
1082     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1083     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1084     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1085     /* get largest number of rows any processor has */
1086     range = mat->rmap->range;
1087     for (i=1; i<size; i++) {
1088       rlen = PetscMax(rlen,range[i+1] - range[i]);
1089     }
1090   } else {
1091     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1092   }
1093 
1094   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1095   /* compute lengths of each row  */
1096   for (i=0; i<a->mbs; i++) {
1097     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1098   }
1099   /* store the row lengths to the file */
1100   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1101   if (!rank) {
1102     MPI_Status status;
1103     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1104     rlen = (range[1] - range[0])/bs;
1105     for (i=0; i<rlen; i++) {
1106       for (j=0; j<bs; j++) {
1107         row_lens[i*bs+j] = bs*crow_lens[i];
1108       }
1109     }
1110     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1111     for (i=1; i<size; i++) {
1112       rlen = (range[i+1] - range[i])/bs;
1113       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1114       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1115       for (k=0; k<rlen; k++) {
1116         for (j=0; j<bs; j++) {
1117           row_lens[k*bs+j] = bs*crow_lens[k];
1118         }
1119       }
1120       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1121     }
1122     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1123     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1124   } else {
1125     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1126     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1127     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1128   }
1129   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1130 
1131   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1132      information needed to make it for each row from a block row. This does require more communication but still not more than
1133      the communication needed for the nonzero values  */
1134   nzmax = nz; /*  space a largest processor needs */
1135   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1136   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1137   cnt   = 0;
1138   for (i=0; i<a->mbs; i++) {
1139     pcnt = cnt;
1140     for (j=B->i[i]; j<B->i[i+1]; j++) {
1141       if ((col = garray[B->j[j]]) > cstart) break;
1142       for (l=0; l<bs; l++) {
1143         column_indices[cnt++] = bs*col+l;
1144       }
1145     }
1146     for (k=A->i[i]; k<A->i[i+1]; k++) {
1147       for (l=0; l<bs; l++) {
1148         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1149       }
1150     }
1151     for (; j<B->i[i+1]; j++) {
1152       for (l=0; l<bs; l++) {
1153         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1154       }
1155     }
1156     len = cnt - pcnt;
1157     for (k=1; k<bs; k++) {
1158       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1159       cnt += len;
1160     }
1161   }
1162   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1163 
1164   /* store the columns to the file */
1165   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1166   if (!rank) {
1167     MPI_Status status;
1168     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1169     for (i=1; i<size; i++) {
1170       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1171       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1172       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1173       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1176   } else {
1177     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1178     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1179     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1180     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1181   }
1182   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1183 
1184   /* load up the numerical values */
1185   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1186   cnt  = 0;
1187   for (i=0; i<a->mbs; i++) {
1188     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1189     for (j=B->i[i]; j<B->i[i+1]; j++) {
1190       if (garray[B->j[j]] > cstart) break;
1191       for (l=0; l<bs; l++) {
1192         for (ll=0; ll<bs; ll++) {
1193           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1194         }
1195       }
1196       cnt += bs;
1197     }
1198     for (k=A->i[i]; k<A->i[i+1]; k++) {
1199       for (l=0; l<bs; l++) {
1200         for (ll=0; ll<bs; ll++) {
1201           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1202         }
1203       }
1204       cnt += bs;
1205     }
1206     for (; j<B->i[i+1]; j++) {
1207       for (l=0; l<bs; l++) {
1208         for (ll=0; ll<bs; ll++) {
1209           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1210         }
1211       }
1212       cnt += bs;
1213     }
1214     cnt += (bs-1)*rlen;
1215   }
1216   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1217 
1218   /* store the column values to the file */
1219   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1220   if (!rank) {
1221     MPI_Status status;
1222     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1223     for (i=1; i<size; i++) {
1224       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1225       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1226       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1227       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1228     }
1229     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1230   } else {
1231     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1232     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1233     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1235   }
1236   ierr = PetscFree(column_values);CHKERRQ(ierr);
1237 
1238   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1239   if (file) {
1240     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1241   }
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatView_MPIBAIJ"
1247 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1248 {
1249   PetscErrorCode ierr;
1250   PetscBool      iascii,isdraw,issocket,isbinary;
1251 
1252   PetscFunctionBegin;
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1255   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1256   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1257   if (iascii || isdraw || issocket) {
1258     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1259   } else if (isbinary) {
1260     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1267 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1268 {
1269   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1270   PetscErrorCode ierr;
1271 
1272   PetscFunctionBegin;
1273 #if defined(PETSC_USE_LOG)
1274   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1275 #endif
1276   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1277   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1278   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1279   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1280 #if defined(PETSC_USE_CTABLE)
1281   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1282 #else
1283   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1284 #endif
1285   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1286   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1287   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1288   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1290   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1291   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1292   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1293 
1294   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatMult_MPIBAIJ"
1309 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1310 {
1311   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313   PetscInt       nt;
1314 
1315   PetscFunctionBegin;
1316   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1317   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1318   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1319   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1320   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1322   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1323   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1329 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1330 {
1331   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1337   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1338   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1344 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1345 {
1346   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1347   PetscErrorCode ierr;
1348   PetscBool      merged;
1349 
1350   PetscFunctionBegin;
1351   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1352   /* do nondiagonal part */
1353   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1354   if (!merged) {
1355     /* send it on its way */
1356     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1357     /* do local part */
1358     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1359     /* receive remote parts: note this assumes the values are not actually */
1360     /* inserted in yy until the next line */
1361     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1362   } else {
1363     /* do local part */
1364     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1365     /* send it on its way */
1366     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367     /* values actually were received in the Begin() but we need to call this nop */
1368     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1369   }
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1375 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1376 {
1377   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1378   PetscErrorCode ierr;
1379 
1380   PetscFunctionBegin;
1381   /* do nondiagonal part */
1382   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1383   /* send it on its way */
1384   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1385   /* do local part */
1386   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1387   /* receive remote parts: note this assumes the values are not actually */
1388   /* inserted in yy until the next line, which is true for my implementation*/
1389   /* but is not perhaps always true. */
1390   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 /*
1395   This only works correctly for square matrices where the subblock A->A is the
1396    diagonal block
1397 */
1398 #undef __FUNCT__
1399 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1400 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1401 {
1402   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1403   PetscErrorCode ierr;
1404 
1405   PetscFunctionBegin;
1406   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1407   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatScale_MPIBAIJ"
1413 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1414 {
1415   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1420   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1426 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1427 {
1428   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1429   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1430   PetscErrorCode ierr;
1431   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1432   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1433   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1434 
1435   PetscFunctionBegin;
1436   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1437   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1438   mat->getrowactive = PETSC_TRUE;
1439 
1440   if (!mat->rowvalues && (idx || v)) {
1441     /*
1442         allocate enough space to hold information from the longest row.
1443     */
1444     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1445     PetscInt    max = 1,mbs = mat->mbs,tmp;
1446     for (i=0; i<mbs; i++) {
1447       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1448       if (max < tmp) max = tmp;
1449     }
1450     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1451   }
1452   lrow = row - brstart;
1453 
1454   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1455   if (!v)   {pvA = 0; pvB = 0;}
1456   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1457   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1458   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1459   nztot = nzA + nzB;
1460 
1461   cmap = mat->garray;
1462   if (v  || idx) {
1463     if (nztot) {
1464       /* Sort by increasing column numbers, assuming A and B already sorted */
1465       PetscInt imark = -1;
1466       if (v) {
1467         *v = v_p = mat->rowvalues;
1468         for (i=0; i<nzB; i++) {
1469           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1470           else break;
1471         }
1472         imark = i;
1473         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1474         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1475       }
1476       if (idx) {
1477         *idx = idx_p = mat->rowindices;
1478         if (imark > -1) {
1479           for (i=0; i<imark; i++) {
1480             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1481           }
1482         } else {
1483           for (i=0; i<nzB; i++) {
1484             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1485             else break;
1486           }
1487           imark = i;
1488         }
1489         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1490         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1491       }
1492     } else {
1493       if (idx) *idx = 0;
1494       if (v)   *v   = 0;
1495     }
1496   }
1497   *nz  = nztot;
1498   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1499   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1505 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1506 {
1507   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1508 
1509   PetscFunctionBegin;
1510   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1511   baij->getrowactive = PETSC_FALSE;
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1517 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1518 {
1519   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1520   PetscErrorCode ierr;
1521 
1522   PetscFunctionBegin;
1523   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1524   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1530 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1531 {
1532   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1533   Mat            A  = a->A,B = a->B;
1534   PetscErrorCode ierr;
1535   PetscReal      isend[5],irecv[5];
1536 
1537   PetscFunctionBegin;
1538   info->block_size = (PetscReal)matin->rmap->bs;
1539 
1540   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1541 
1542   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1543   isend[3] = info->memory;  isend[4] = info->mallocs;
1544 
1545   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1546 
1547   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1548   isend[3] += info->memory;  isend[4] += info->mallocs;
1549 
1550   if (flag == MAT_LOCAL) {
1551     info->nz_used      = isend[0];
1552     info->nz_allocated = isend[1];
1553     info->nz_unneeded  = isend[2];
1554     info->memory       = isend[3];
1555     info->mallocs      = isend[4];
1556   } else if (flag == MAT_GLOBAL_MAX) {
1557     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1558 
1559     info->nz_used      = irecv[0];
1560     info->nz_allocated = irecv[1];
1561     info->nz_unneeded  = irecv[2];
1562     info->memory       = irecv[3];
1563     info->mallocs      = irecv[4];
1564   } else if (flag == MAT_GLOBAL_SUM) {
1565     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1566 
1567     info->nz_used      = irecv[0];
1568     info->nz_allocated = irecv[1];
1569     info->nz_unneeded  = irecv[2];
1570     info->memory       = irecv[3];
1571     info->mallocs      = irecv[4];
1572   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1573   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1574   info->fill_ratio_needed = 0;
1575   info->factor_mallocs    = 0;
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1581 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1582 {
1583   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1584   PetscErrorCode ierr;
1585 
1586   PetscFunctionBegin;
1587   switch (op) {
1588   case MAT_NEW_NONZERO_LOCATIONS:
1589   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1590   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1591   case MAT_KEEP_NONZERO_PATTERN:
1592   case MAT_NEW_NONZERO_LOCATION_ERR:
1593     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1594     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1595     break;
1596   case MAT_ROW_ORIENTED:
1597     a->roworiented = flg;
1598 
1599     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1600     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1601     break;
1602   case MAT_NEW_DIAGONALS:
1603     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1604     break;
1605   case MAT_IGNORE_OFF_PROC_ENTRIES:
1606     a->donotstash = flg;
1607     break;
1608   case MAT_USE_HASH_TABLE:
1609     a->ht_flag = flg;
1610     break;
1611   case MAT_SYMMETRIC:
1612   case MAT_STRUCTURALLY_SYMMETRIC:
1613   case MAT_HERMITIAN:
1614   case MAT_SYMMETRY_ETERNAL:
1615     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1616     break;
1617   default:
1618     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1619   }
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1625 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1626 {
1627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1628   Mat_SeqBAIJ    *Aloc;
1629   Mat            B;
1630   PetscErrorCode ierr;
1631   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1632   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1633   MatScalar      *a;
1634 
1635   PetscFunctionBegin;
1636   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1637   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1638     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1639     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1640     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1641     /* Do not know preallocation information, but must set block size */
1642     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1643   } else {
1644     B = *matout;
1645   }
1646 
1647   /* copy over the A part */
1648   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1649   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1650   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1651 
1652   for (i=0; i<mbs; i++) {
1653     rvals[0] = bs*(baij->rstartbs + i);
1654     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1655     for (j=ai[i]; j<ai[i+1]; j++) {
1656       col = (baij->cstartbs+aj[j])*bs;
1657       for (k=0; k<bs; k++) {
1658         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1659 
1660         col++; a += bs;
1661       }
1662     }
1663   }
1664   /* copy over the B part */
1665   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1666   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1667   for (i=0; i<mbs; i++) {
1668     rvals[0] = bs*(baij->rstartbs + i);
1669     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1670     for (j=ai[i]; j<ai[i+1]; j++) {
1671       col = baij->garray[aj[j]]*bs;
1672       for (k=0; k<bs; k++) {
1673         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1674         col++;
1675         a += bs;
1676       }
1677     }
1678   }
1679   ierr = PetscFree(rvals);CHKERRQ(ierr);
1680   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682 
1683   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1684   else {
1685     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1686   }
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1692 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1693 {
1694   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1695   Mat            a     = baij->A,b = baij->B;
1696   PetscErrorCode ierr;
1697   PetscInt       s1,s2,s3;
1698 
1699   PetscFunctionBegin;
1700   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1701   if (rr) {
1702     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1703     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1704     /* Overlap communication with computation. */
1705     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1706   }
1707   if (ll) {
1708     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1709     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1710     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1711   }
1712   /* scale  the diagonal block */
1713   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1714 
1715   if (rr) {
1716     /* Do a scatter end and then right scale the off-diagonal block */
1717     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1718     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1719   }
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1725 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1726 {
1727   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1728   PetscInt      *owners = A->rmap->range;
1729   PetscInt       n      = A->rmap->n;
1730   PetscSF        sf;
1731   PetscInt      *lrows;
1732   PetscSFNode   *rrows;
1733   PetscInt       r, p = 0, len = 0;
1734   PetscErrorCode ierr;
1735 
1736   PetscFunctionBegin;
1737   /* Create SF where leaves are input rows and roots are owned rows */
1738   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1739   for (r = 0; r < n; ++r) lrows[r] = -1;
1740   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
1741   for (r = 0; r < N; ++r) {
1742     const PetscInt idx   = rows[r];
1743     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1744     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1745       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1746     }
1747     if (A->nooffproczerorows) {
1748       if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank);
1749       lrows[len++] = idx - owners[p];
1750     } else {
1751       rrows[r].rank = p;
1752       rrows[r].index = rows[r] - owners[p];
1753     }
1754   }
1755   if (!A->nooffproczerorows) {
1756     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1757     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1758     /* Collect flags for rows to be zeroed */
1759     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1760     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1761     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1762     /* Compress and put in row numbers */
1763     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1764   }
1765   /* fix right hand side if needed */
1766   if (x && b) {
1767     const PetscScalar *xx;
1768     PetscScalar       *bb;
1769 
1770     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1771     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1772     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1773     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1774     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1775   }
1776 
1777   /* actually zap the local rows */
1778   /*
1779         Zero the required rows. If the "diagonal block" of the matrix
1780      is square and the user wishes to set the diagonal we use separate
1781      code so that MatSetValues() is not called for each diagonal allocating
1782      new memory, thus calling lots of mallocs and slowing things down.
1783 
1784   */
1785   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1786   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1787   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1788     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1789   } else if (diag != 0.0) {
1790     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1791     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1792        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1793     for (r = 0; r < len; ++r) {
1794       const PetscInt row = lrows[r] + A->rmap->rstart;
1795       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1796     }
1797     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1798     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1799   } else {
1800     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1801   }
1802   ierr = PetscFree(lrows);CHKERRQ(ierr);
1803 
1804   /* only change matrix nonzero state if pattern was allowed to be changed */
1805   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1806     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1807     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1808   }
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1814 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1815 {
1816   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1817   PetscErrorCode    ierr;
1818   PetscMPIInt       n = A->rmap->n;
1819   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1820   PetscInt          *lrows,*owners = A->rmap->range;
1821   PetscSFNode       *rrows;
1822   PetscSF           sf;
1823   const PetscScalar *xx;
1824   PetscScalar       *bb,*mask;
1825   Vec               xmask,lmask;
1826   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1827   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1828   PetscScalar       *aa;
1829 
1830   PetscFunctionBegin;
1831   /* Create SF where leaves are input rows and roots are owned rows */
1832   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1833   for (r = 0; r < n; ++r) lrows[r] = -1;
1834   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1835   for (r = 0; r < N; ++r) {
1836     const PetscInt idx   = rows[r];
1837     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1838     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1839       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1840     }
1841     rrows[r].rank  = p;
1842     rrows[r].index = rows[r] - owners[p];
1843   }
1844   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1845   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1846   /* Collect flags for rows to be zeroed */
1847   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1848   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1849   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1850   /* Compress and put in row numbers */
1851   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1852   /* zero diagonal part of matrix */
1853   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1854   /* handle off diagonal part of matrix */
1855   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1856   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1857   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1858   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1859   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1860   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1861   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1862   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1863   if (x) {
1864     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1865     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1866     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1867     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1868   }
1869   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1870   /* remove zeroed rows of off diagonal matrix */
1871   for (i = 0; i < len; ++i) {
1872     row   = lrows[i];
1873     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1874     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1875     for (k = 0; k < count; ++k) {
1876       aa[0] = 0.0;
1877       aa   += bs;
1878     }
1879   }
1880   /* loop over all elements of off process part of matrix zeroing removed columns*/
1881   for (i = 0; i < l->B->rmap->N; ++i) {
1882     row = i/bs;
1883     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1884       for (k = 0; k < bs; ++k) {
1885         col = bs*baij->j[j] + k;
1886         if (PetscAbsScalar(mask[col])) {
1887           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1888           if (b) bb[i] -= aa[0]*xx[col];
1889           aa[0] = 0.0;
1890         }
1891       }
1892     }
1893   }
1894   if (x) {
1895     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1896     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1897   }
1898   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1899   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1900   ierr = PetscFree(lrows);CHKERRQ(ierr);
1901 
1902   /* only change matrix nonzero state if pattern was allowed to be changed */
1903   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1904     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1905     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1906   }
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1912 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1913 {
1914   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1915   PetscErrorCode ierr;
1916 
1917   PetscFunctionBegin;
1918   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1923 
1924 #undef __FUNCT__
1925 #define __FUNCT__ "MatEqual_MPIBAIJ"
1926 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1927 {
1928   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1929   Mat            a,b,c,d;
1930   PetscBool      flg;
1931   PetscErrorCode ierr;
1932 
1933   PetscFunctionBegin;
1934   a = matA->A; b = matA->B;
1935   c = matB->A; d = matB->B;
1936 
1937   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1938   if (flg) {
1939     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1940   }
1941   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 #undef __FUNCT__
1946 #define __FUNCT__ "MatCopy_MPIBAIJ"
1947 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1948 {
1949   PetscErrorCode ierr;
1950   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1951   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1952 
1953   PetscFunctionBegin;
1954   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1955   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1956     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1957   } else {
1958     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1959     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1960   }
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1966 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1967 {
1968   PetscErrorCode ierr;
1969 
1970   PetscFunctionBegin;
1971   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 #undef __FUNCT__
1976 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
1977 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1978 {
1979   PetscErrorCode ierr;
1980   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1981   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1982   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1983 
1984   PetscFunctionBegin;
1985   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1991 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1992 {
1993   PetscErrorCode ierr;
1994   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1995   PetscBLASInt   bnz,one=1;
1996   Mat_SeqBAIJ    *x,*y;
1997 
1998   PetscFunctionBegin;
1999   if (str == SAME_NONZERO_PATTERN) {
2000     PetscScalar alpha = a;
2001     x    = (Mat_SeqBAIJ*)xx->A->data;
2002     y    = (Mat_SeqBAIJ*)yy->A->data;
2003     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2004     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2005     x    = (Mat_SeqBAIJ*)xx->B->data;
2006     y    = (Mat_SeqBAIJ*)yy->B->data;
2007     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2008     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2009     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2010   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2011     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2012   } else {
2013     Mat      B;
2014     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2015     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2016     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2017     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2018     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2019     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2020     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2021     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2022     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2023     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2024     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2025     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2026     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2027     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2028     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2029     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2030   }
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 #undef __FUNCT__
2035 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2036 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2037 {
2038   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2039   PetscErrorCode ierr;
2040 
2041   PetscFunctionBegin;
2042   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2043   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2049 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2050 {
2051   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2052   PetscErrorCode ierr;
2053 
2054   PetscFunctionBegin;
2055   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2056   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 #undef __FUNCT__
2061 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2062 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2063 {
2064   PetscErrorCode ierr;
2065   IS             iscol_local;
2066   PetscInt       csize;
2067 
2068   PetscFunctionBegin;
2069   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2070   if (call == MAT_REUSE_MATRIX) {
2071     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2072     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2073   } else {
2074     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2075   }
2076   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2077   if (call == MAT_INITIAL_MATRIX) {
2078     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2079     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2080   }
2081   PetscFunctionReturn(0);
2082 }
2083 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2086 /*
2087   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2088   in local and then by concatenating the local matrices the end result.
2089   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ().
2090   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2091 */
2092 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2093 {
2094   PetscErrorCode ierr;
2095   PetscMPIInt    rank,size;
2096   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2097   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2098   Mat            M,Mreuse;
2099   MatScalar      *vwork,*aa;
2100   MPI_Comm       comm;
2101   IS             isrow_new, iscol_new;
2102   PetscBool      idflag,allrows, allcols;
2103   Mat_SeqBAIJ    *aij;
2104 
2105   PetscFunctionBegin;
2106   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2107   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2108   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2109   /* The compression and expansion should be avoided. Doesn't point
2110      out errors, might change the indices, hence buggey */
2111   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2112   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2113 
2114   /* Check for special case: each processor gets entire matrix columns */
2115   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2116   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2117   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2118   else allcols = PETSC_FALSE;
2119 
2120   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2121   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2122   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2123   else allrows = PETSC_FALSE;
2124 
2125   if (call ==  MAT_REUSE_MATRIX) {
2126     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2127     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2128     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2129   } else {
2130     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2131   }
2132   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2133   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2134   /*
2135       m - number of local rows
2136       n - number of columns (same on all processors)
2137       rstart - first row in new global matrix generated
2138   */
2139   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2140   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2141   m    = m/bs;
2142   n    = n/bs;
2143 
2144   if (call == MAT_INITIAL_MATRIX) {
2145     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2146     ii  = aij->i;
2147     jj  = aij->j;
2148 
2149     /*
2150         Determine the number of non-zeros in the diagonal and off-diagonal
2151         portions of the matrix in order to do correct preallocation
2152     */
2153 
2154     /* first get start and end of "diagonal" columns */
2155     if (csize == PETSC_DECIDE) {
2156       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2157       if (mglobal == n*bs) { /* square matrix */
2158         nlocal = m;
2159       } else {
2160         nlocal = n/size + ((n % size) > rank);
2161       }
2162     } else {
2163       nlocal = csize/bs;
2164     }
2165     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2166     rstart = rend - nlocal;
2167     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2168 
2169     /* next, compute all the lengths */
2170     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2171     for (i=0; i<m; i++) {
2172       jend = ii[i+1] - ii[i];
2173       olen = 0;
2174       dlen = 0;
2175       for (j=0; j<jend; j++) {
2176         if (*jj < rstart || *jj >= rend) olen++;
2177         else dlen++;
2178         jj++;
2179       }
2180       olens[i] = olen;
2181       dlens[i] = dlen;
2182     }
2183     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2184     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2185     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2186     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2187     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2188     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2189   } else {
2190     PetscInt ml,nl;
2191 
2192     M    = *newmat;
2193     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2194     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2195     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2196     /*
2197          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2198        rather than the slower MatSetValues().
2199     */
2200     M->was_assembled = PETSC_TRUE;
2201     M->assembled     = PETSC_FALSE;
2202   }
2203   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2204   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2205   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2206   ii   = aij->i;
2207   jj   = aij->j;
2208   aa   = aij->a;
2209   for (i=0; i<m; i++) {
2210     row   = rstart/bs + i;
2211     nz    = ii[i+1] - ii[i];
2212     cwork = jj;     jj += nz;
2213     vwork = aa;     aa += nz*bs*bs;
2214     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2215   }
2216 
2217   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2218   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2219   *newmat = M;
2220 
2221   /* save submatrix used in processor for next request */
2222   if (call ==  MAT_INITIAL_MATRIX) {
2223     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2224     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2225   }
2226   PetscFunctionReturn(0);
2227 }
2228 
2229 #undef __FUNCT__
2230 #define __FUNCT__ "MatPermute_MPIBAIJ"
2231 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2232 {
2233   MPI_Comm       comm,pcomm;
2234   PetscInt       clocal_size,nrows;
2235   const PetscInt *rows;
2236   PetscMPIInt    size;
2237   IS             crowp,lcolp;
2238   PetscErrorCode ierr;
2239 
2240   PetscFunctionBegin;
2241   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2242   /* make a collective version of 'rowp' */
2243   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2244   if (pcomm==comm) {
2245     crowp = rowp;
2246   } else {
2247     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2248     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2249     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2250     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2251   }
2252   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2253   /* make a local version of 'colp' */
2254   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2255   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2256   if (size==1) {
2257     lcolp = colp;
2258   } else {
2259     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2260   }
2261   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2262   /* now we just get the submatrix */
2263   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2264   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2265   /* clean up */
2266   if (pcomm!=comm) {
2267     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2268   }
2269   if (size>1) {
2270     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2271   }
2272   PetscFunctionReturn(0);
2273 }
2274 
2275 #undef __FUNCT__
2276 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2277 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2278 {
2279   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2280   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2281 
2282   PetscFunctionBegin;
2283   if (nghosts) *nghosts = B->nbs;
2284   if (ghosts) *ghosts = baij->garray;
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2290 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2291 {
2292   Mat            B;
2293   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2294   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2295   Mat_SeqAIJ     *b;
2296   PetscErrorCode ierr;
2297   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2298   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2299   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2300 
2301   PetscFunctionBegin;
2302   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2303   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2304 
2305   /* ----------------------------------------------------------------
2306      Tell every processor the number of nonzeros per row
2307   */
2308   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2309   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2310     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2311   }
2312   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2313   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2314   displs    = recvcounts + size;
2315   for (i=0; i<size; i++) {
2316     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2317     displs[i]     = A->rmap->range[i]/bs;
2318   }
2319 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2320   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2321 #else
2322   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2323 #endif
2324   /* ---------------------------------------------------------------
2325      Create the sequential matrix of the same type as the local block diagonal
2326   */
2327   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2328   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2329   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2330   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2331   b    = (Mat_SeqAIJ*)B->data;
2332 
2333   /*--------------------------------------------------------------------
2334     Copy my part of matrix column indices over
2335   */
2336   sendcount  = ad->nz + bd->nz;
2337   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2338   a_jsendbuf = ad->j;
2339   b_jsendbuf = bd->j;
2340   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2341   cnt        = 0;
2342   for (i=0; i<n; i++) {
2343 
2344     /* put in lower diagonal portion */
2345     m = bd->i[i+1] - bd->i[i];
2346     while (m > 0) {
2347       /* is it above diagonal (in bd (compressed) numbering) */
2348       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2349       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2350       m--;
2351     }
2352 
2353     /* put in diagonal portion */
2354     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2355       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2356     }
2357 
2358     /* put in upper diagonal portion */
2359     while (m-- > 0) {
2360       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2361     }
2362   }
2363   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2364 
2365   /*--------------------------------------------------------------------
2366     Gather all column indices to all processors
2367   */
2368   for (i=0; i<size; i++) {
2369     recvcounts[i] = 0;
2370     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2371       recvcounts[i] += lens[j];
2372     }
2373   }
2374   displs[0] = 0;
2375   for (i=1; i<size; i++) {
2376     displs[i] = displs[i-1] + recvcounts[i-1];
2377   }
2378 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2379   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2380 #else
2381   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2382 #endif
2383   /*--------------------------------------------------------------------
2384     Assemble the matrix into useable form (note numerical values not yet set)
2385   */
2386   /* set the b->ilen (length of each row) values */
2387   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2388   /* set the b->i indices */
2389   b->i[0] = 0;
2390   for (i=1; i<=A->rmap->N/bs; i++) {
2391     b->i[i] = b->i[i-1] + lens[i-1];
2392   }
2393   ierr = PetscFree(lens);CHKERRQ(ierr);
2394   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2395   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2396   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2397 
2398   if (A->symmetric) {
2399     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2400   } else if (A->hermitian) {
2401     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2402   } else if (A->structurally_symmetric) {
2403     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2404   }
2405   *newmat = B;
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatSOR_MPIBAIJ"
2411 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2412 {
2413   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2414   PetscErrorCode ierr;
2415   Vec            bb1 = 0;
2416 
2417   PetscFunctionBegin;
2418   if (flag == SOR_APPLY_UPPER) {
2419     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2420     PetscFunctionReturn(0);
2421   }
2422 
2423   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2424     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2425   }
2426 
2427   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2428     if (flag & SOR_ZERO_INITIAL_GUESS) {
2429       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2430       its--;
2431     }
2432 
2433     while (its--) {
2434       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2435       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2436 
2437       /* update rhs: bb1 = bb - B*x */
2438       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2439       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2440 
2441       /* local sweep */
2442       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2443     }
2444   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2445     if (flag & SOR_ZERO_INITIAL_GUESS) {
2446       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2447       its--;
2448     }
2449     while (its--) {
2450       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2451       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2452 
2453       /* update rhs: bb1 = bb - B*x */
2454       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2455       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2456 
2457       /* local sweep */
2458       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2459     }
2460   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2461     if (flag & SOR_ZERO_INITIAL_GUESS) {
2462       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2463       its--;
2464     }
2465     while (its--) {
2466       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2467       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2468 
2469       /* update rhs: bb1 = bb - B*x */
2470       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2471       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2472 
2473       /* local sweep */
2474       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2475     }
2476   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2477 
2478   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 #undef __FUNCT__
2483 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2484 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2485 {
2486   PetscErrorCode ierr;
2487   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2488   PetscInt       N,i,*garray = aij->garray;
2489   PetscInt       ib,jb,bs = A->rmap->bs;
2490   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2491   MatScalar      *a_val = a_aij->a;
2492   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2493   MatScalar      *b_val = b_aij->a;
2494   PetscReal      *work;
2495 
2496   PetscFunctionBegin;
2497   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2498   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2499   if (type == NORM_2) {
2500     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2501       for (jb=0; jb<bs; jb++) {
2502         for (ib=0; ib<bs; ib++) {
2503           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2504           a_val++;
2505         }
2506       }
2507     }
2508     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2509       for (jb=0; jb<bs; jb++) {
2510         for (ib=0; ib<bs; ib++) {
2511           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2512           b_val++;
2513         }
2514       }
2515     }
2516   } else if (type == NORM_1) {
2517     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2518       for (jb=0; jb<bs; jb++) {
2519         for (ib=0; ib<bs; ib++) {
2520           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2521           a_val++;
2522         }
2523       }
2524     }
2525     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2526       for (jb=0; jb<bs; jb++) {
2527        for (ib=0; ib<bs; ib++) {
2528           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2529           b_val++;
2530         }
2531       }
2532     }
2533   } else if (type == NORM_INFINITY) {
2534     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2535       for (jb=0; jb<bs; jb++) {
2536         for (ib=0; ib<bs; ib++) {
2537           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2538           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2539           a_val++;
2540         }
2541       }
2542     }
2543     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2544       for (jb=0; jb<bs; jb++) {
2545         for (ib=0; ib<bs; ib++) {
2546           int col = garray[b_aij->j[i]] * bs + jb;
2547           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2548           b_val++;
2549         }
2550       }
2551     }
2552   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2553   if (type == NORM_INFINITY) {
2554     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2555   } else {
2556     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2557   }
2558   ierr = PetscFree(work);CHKERRQ(ierr);
2559   if (type == NORM_2) {
2560     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2561   }
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2567 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2568 {
2569   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2570   PetscErrorCode ierr;
2571 
2572   PetscFunctionBegin;
2573   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 
2578 /* -------------------------------------------------------------------*/
2579 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2580                                        MatGetRow_MPIBAIJ,
2581                                        MatRestoreRow_MPIBAIJ,
2582                                        MatMult_MPIBAIJ,
2583                                 /* 4*/ MatMultAdd_MPIBAIJ,
2584                                        MatMultTranspose_MPIBAIJ,
2585                                        MatMultTransposeAdd_MPIBAIJ,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*10*/ 0,
2590                                        0,
2591                                        0,
2592                                        MatSOR_MPIBAIJ,
2593                                        MatTranspose_MPIBAIJ,
2594                                 /*15*/ MatGetInfo_MPIBAIJ,
2595                                        MatEqual_MPIBAIJ,
2596                                        MatGetDiagonal_MPIBAIJ,
2597                                        MatDiagonalScale_MPIBAIJ,
2598                                        MatNorm_MPIBAIJ,
2599                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2600                                        MatAssemblyEnd_MPIBAIJ,
2601                                        MatSetOption_MPIBAIJ,
2602                                        MatZeroEntries_MPIBAIJ,
2603                                 /*24*/ MatZeroRows_MPIBAIJ,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                 /*29*/ MatSetUp_MPIBAIJ,
2609                                        0,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                 /*34*/ MatDuplicate_MPIBAIJ,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                 /*39*/ MatAXPY_MPIBAIJ,
2619                                        MatGetSubMatrices_MPIBAIJ,
2620                                        MatIncreaseOverlap_MPIBAIJ,
2621                                        MatGetValues_MPIBAIJ,
2622                                        MatCopy_MPIBAIJ,
2623                                 /*44*/ 0,
2624                                        MatScale_MPIBAIJ,
2625                                        0,
2626                                        0,
2627                                        MatZeroRowsColumns_MPIBAIJ,
2628                                 /*49*/ 0,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2634                                        0,
2635                                        MatSetUnfactored_MPIBAIJ,
2636                                        MatPermute_MPIBAIJ,
2637                                        MatSetValuesBlocked_MPIBAIJ,
2638                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2639                                        MatDestroy_MPIBAIJ,
2640                                        MatView_MPIBAIJ,
2641                                        0,
2642                                        0,
2643                                 /*64*/ 0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                 /*74*/ 0,
2654                                        MatFDColoringApply_BAIJ,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                 /*79*/ 0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        MatLoad_MPIBAIJ,
2663                                 /*84*/ 0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                 /*89*/ 0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                 /*94*/ 0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                 /*99*/ 0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                 /*104*/0,
2684                                        MatRealPart_MPIBAIJ,
2685                                        MatImaginaryPart_MPIBAIJ,
2686                                        0,
2687                                        0,
2688                                 /*109*/0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2694                                        0,
2695                                        MatGetGhosts_MPIBAIJ,
2696                                        0,
2697                                        0,
2698                                 /*119*/0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        MatGetMultiProcBlock_MPIBAIJ,
2703                                 /*124*/0,
2704                                        MatGetColumnNorms_MPIBAIJ,
2705                                        MatInvertBlockDiagonal_MPIBAIJ,
2706                                        0,
2707                                        0,
2708                                /*129*/ 0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                /*134*/ 0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                /*139*/ 0,
2719                                        0,
2720                                        0,
2721                                        MatFDColoringSetUp_MPIXAIJ,
2722                                        0,
2723                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2724 };
2725 
2726 #undef __FUNCT__
2727 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2728 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2729 {
2730   PetscFunctionBegin;
2731   *a = ((Mat_MPIBAIJ*)A->data)->A;
2732   PetscFunctionReturn(0);
2733 }
2734 
2735 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2736 
2737 #undef __FUNCT__
2738 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2739 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2740 {
2741   PetscInt       m,rstart,cstart,cend;
2742   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2743   const PetscInt *JJ    =0;
2744   PetscScalar    *values=0;
2745   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2746   PetscErrorCode ierr;
2747 
2748   PetscFunctionBegin;
2749   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2750   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2751   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2752   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2753   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2754   m      = B->rmap->n/bs;
2755   rstart = B->rmap->rstart/bs;
2756   cstart = B->cmap->rstart/bs;
2757   cend   = B->cmap->rend/bs;
2758 
2759   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2760   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2761   for (i=0; i<m; i++) {
2762     nz = ii[i+1] - ii[i];
2763     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2764     nz_max = PetscMax(nz_max,nz);
2765     JJ     = jj + ii[i];
2766     for (j=0; j<nz; j++) {
2767       if (*JJ >= cstart) break;
2768       JJ++;
2769     }
2770     d = 0;
2771     for (; j<nz; j++) {
2772       if (*JJ++ >= cend) break;
2773       d++;
2774     }
2775     d_nnz[i] = d;
2776     o_nnz[i] = nz - d;
2777   }
2778   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2779   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2780 
2781   values = (PetscScalar*)V;
2782   if (!values) {
2783     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2784     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2785   }
2786   for (i=0; i<m; i++) {
2787     PetscInt          row    = i + rstart;
2788     PetscInt          ncols  = ii[i+1] - ii[i];
2789     const PetscInt    *icols = jj + ii[i];
2790     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2791       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2792       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2793     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2794       PetscInt j;
2795       for (j=0; j<ncols; j++) {
2796         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2797         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2798       }
2799     }
2800   }
2801 
2802   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2803   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2804   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2805   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 #undef __FUNCT__
2810 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2811 /*@C
2812    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2813    (the default parallel PETSc format).
2814 
2815    Collective on MPI_Comm
2816 
2817    Input Parameters:
2818 +  B - the matrix
2819 .  bs - the block size
2820 .  i - the indices into j for the start of each local row (starts with zero)
2821 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2822 -  v - optional values in the matrix
2823 
2824    Level: developer
2825 
2826    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2827    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2828    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2829    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2830    block column and the second index is over columns within a block.
2831 
2832 .keywords: matrix, aij, compressed row, sparse, parallel
2833 
2834 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2835 @*/
2836 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2837 {
2838   PetscErrorCode ierr;
2839 
2840   PetscFunctionBegin;
2841   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2842   PetscValidType(B,1);
2843   PetscValidLogicalCollectiveInt(B,bs,2);
2844   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 #undef __FUNCT__
2849 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2850 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2851 {
2852   Mat_MPIBAIJ    *b;
2853   PetscErrorCode ierr;
2854   PetscInt       i;
2855 
2856   PetscFunctionBegin;
2857   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2858   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2859   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2860   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2861 
2862   if (d_nnz) {
2863     for (i=0; i<B->rmap->n/bs; i++) {
2864       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2865     }
2866   }
2867   if (o_nnz) {
2868     for (i=0; i<B->rmap->n/bs; i++) {
2869       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2870     }
2871   }
2872 
2873   b      = (Mat_MPIBAIJ*)B->data;
2874   b->bs2 = bs*bs;
2875   b->mbs = B->rmap->n/bs;
2876   b->nbs = B->cmap->n/bs;
2877   b->Mbs = B->rmap->N/bs;
2878   b->Nbs = B->cmap->N/bs;
2879 
2880   for (i=0; i<=b->size; i++) {
2881     b->rangebs[i] = B->rmap->range[i]/bs;
2882   }
2883   b->rstartbs = B->rmap->rstart/bs;
2884   b->rendbs   = B->rmap->rend/bs;
2885   b->cstartbs = B->cmap->rstart/bs;
2886   b->cendbs   = B->cmap->rend/bs;
2887 
2888   if (!B->preallocated) {
2889     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2890     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2891     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2892     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2893     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2894     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2895     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2896     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2897     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2898   }
2899 
2900   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2901   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2902   B->preallocated = PETSC_TRUE;
2903   PetscFunctionReturn(0);
2904 }
2905 
2906 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2907 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2908 
2909 #undef __FUNCT__
2910 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2911 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2912 {
2913   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2914   PetscErrorCode ierr;
2915   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2916   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2917   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2918 
2919   PetscFunctionBegin;
2920   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2921   ii[0] = 0;
2922   for (i=0; i<M; i++) {
2923     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2924     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2925     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2926     /* remove one from count of matrix has diagonal */
2927     for (j=id[i]; j<id[i+1]; j++) {
2928       if (jd[j] == i) {ii[i+1]--;break;}
2929     }
2930   }
2931   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2932   cnt  = 0;
2933   for (i=0; i<M; i++) {
2934     for (j=io[i]; j<io[i+1]; j++) {
2935       if (garray[jo[j]] > rstart) break;
2936       jj[cnt++] = garray[jo[j]];
2937     }
2938     for (k=id[i]; k<id[i+1]; k++) {
2939       if (jd[k] != i) {
2940         jj[cnt++] = rstart + jd[k];
2941       }
2942     }
2943     for (; j<io[i+1]; j++) {
2944       jj[cnt++] = garray[jo[j]];
2945     }
2946   }
2947   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2952 
2953 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2954 
2955 #undef __FUNCT__
2956 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2957 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2958 {
2959   PetscErrorCode ierr;
2960   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2961   Mat            B;
2962   Mat_MPIAIJ     *b;
2963 
2964   PetscFunctionBegin;
2965   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2966 
2967   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2968   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2969   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2970   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2971   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2972   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2973   b    = (Mat_MPIAIJ*) B->data;
2974 
2975   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2976   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2977   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2978   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2979   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2980   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2981   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2982   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2983   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2984   if (reuse == MAT_REUSE_MATRIX) {
2985     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2986   } else {
2987    *newmat = B;
2988   }
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 /*MC
2993    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2994 
2995    Options Database Keys:
2996 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2997 . -mat_block_size <bs> - set the blocksize used to store the matrix
2998 - -mat_use_hash_table <fact>
2999 
3000   Level: beginner
3001 
3002 .seealso: MatCreateMPIBAIJ
3003 M*/
3004 
3005 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3006 
3007 #undef __FUNCT__
3008 #define __FUNCT__ "MatCreate_MPIBAIJ"
3009 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3010 {
3011   Mat_MPIBAIJ    *b;
3012   PetscErrorCode ierr;
3013   PetscBool      flg = PETSC_FALSE;
3014 
3015   PetscFunctionBegin;
3016   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3017   B->data = (void*)b;
3018 
3019   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3020   B->assembled = PETSC_FALSE;
3021 
3022   B->insertmode = NOT_SET_VALUES;
3023   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3024   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3025 
3026   /* build local table of row and column ownerships */
3027   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3028 
3029   /* build cache for off array entries formed */
3030   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3031 
3032   b->donotstash  = PETSC_FALSE;
3033   b->colmap      = NULL;
3034   b->garray      = NULL;
3035   b->roworiented = PETSC_TRUE;
3036 
3037   /* stuff used in block assembly */
3038   b->barray = 0;
3039 
3040   /* stuff used for matrix vector multiply */
3041   b->lvec  = 0;
3042   b->Mvctx = 0;
3043 
3044   /* stuff for MatGetRow() */
3045   b->rowindices   = 0;
3046   b->rowvalues    = 0;
3047   b->getrowactive = PETSC_FALSE;
3048 
3049   /* hash table stuff */
3050   b->ht           = 0;
3051   b->hd           = 0;
3052   b->ht_size      = 0;
3053   b->ht_flag      = PETSC_FALSE;
3054   b->ht_fact      = 0;
3055   b->ht_total_ct  = 0;
3056   b->ht_insert_ct = 0;
3057 
3058   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3059   b->ijonly = PETSC_FALSE;
3060 
3061 
3062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3069   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3072   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3073   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3074 
3075   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3076   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
3077   if (flg) {
3078     PetscReal fact = 1.39;
3079     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3080     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3081     if (fact <= 1.0) fact = 1.39;
3082     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3083     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3084   }
3085   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3086   PetscFunctionReturn(0);
3087 }
3088 
3089 /*MC
3090    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3091 
3092    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3093    and MATMPIBAIJ otherwise.
3094 
3095    Options Database Keys:
3096 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3097 
3098   Level: beginner
3099 
3100 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3101 M*/
3102 
3103 #undef __FUNCT__
3104 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3105 /*@C
3106    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3107    (block compressed row).  For good matrix assembly performance
3108    the user should preallocate the matrix storage by setting the parameters
3109    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3110    performance can be increased by more than a factor of 50.
3111 
3112    Collective on Mat
3113 
3114    Input Parameters:
3115 +  B - the matrix
3116 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3117           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3118 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3119            submatrix  (same for all local rows)
3120 .  d_nnz - array containing the number of block nonzeros in the various block rows
3121            of the in diagonal portion of the local (possibly different for each block
3122            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3123            set it even if it is zero.
3124 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3125            submatrix (same for all local rows).
3126 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3127            off-diagonal portion of the local submatrix (possibly different for
3128            each block row) or NULL.
3129 
3130    If the *_nnz parameter is given then the *_nz parameter is ignored
3131 
3132    Options Database Keys:
3133 +   -mat_block_size - size of the blocks to use
3134 -   -mat_use_hash_table <fact>
3135 
3136    Notes:
3137    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3138    than it must be used on all processors that share the object for that argument.
3139 
3140    Storage Information:
3141    For a square global matrix we define each processor's diagonal portion
3142    to be its local rows and the corresponding columns (a square submatrix);
3143    each processor's off-diagonal portion encompasses the remainder of the
3144    local matrix (a rectangular submatrix).
3145 
3146    The user can specify preallocated storage for the diagonal part of
3147    the local submatrix with either d_nz or d_nnz (not both).  Set
3148    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3149    memory allocation.  Likewise, specify preallocated storage for the
3150    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3151 
3152    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3153    the figure below we depict these three local rows and all columns (0-11).
3154 
3155 .vb
3156            0 1 2 3 4 5 6 7 8 9 10 11
3157           --------------------------
3158    row 3  |o o o d d d o o o o  o  o
3159    row 4  |o o o d d d o o o o  o  o
3160    row 5  |o o o d d d o o o o  o  o
3161           --------------------------
3162 .ve
3163 
3164    Thus, any entries in the d locations are stored in the d (diagonal)
3165    submatrix, and any entries in the o locations are stored in the
3166    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3167    stored simply in the MATSEQBAIJ format for compressed row storage.
3168 
3169    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3170    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3171    In general, for PDE problems in which most nonzeros are near the diagonal,
3172    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3173    or you will get TERRIBLE performance; see the users' manual chapter on
3174    matrices.
3175 
3176    You can call MatGetInfo() to get information on how effective the preallocation was;
3177    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3178    You can also run with the option -info and look for messages with the string
3179    malloc in them to see if additional memory allocation was needed.
3180 
3181    Level: intermediate
3182 
3183 .keywords: matrix, block, aij, compressed row, sparse, parallel
3184 
3185 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3186 @*/
3187 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3188 {
3189   PetscErrorCode ierr;
3190 
3191   PetscFunctionBegin;
3192   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3193   PetscValidType(B,1);
3194   PetscValidLogicalCollectiveInt(B,bs,2);
3195   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3196   PetscFunctionReturn(0);
3197 }
3198 
3199 #undef __FUNCT__
3200 #define __FUNCT__ "MatCreateBAIJ"
3201 /*@C
3202    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3203    (block compressed row).  For good matrix assembly performance
3204    the user should preallocate the matrix storage by setting the parameters
3205    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3206    performance can be increased by more than a factor of 50.
3207 
3208    Collective on MPI_Comm
3209 
3210    Input Parameters:
3211 +  comm - MPI communicator
3212 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3213           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3214 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3215            This value should be the same as the local size used in creating the
3216            y vector for the matrix-vector product y = Ax.
3217 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3218            This value should be the same as the local size used in creating the
3219            x vector for the matrix-vector product y = Ax.
3220 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3221 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3222 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3223            submatrix  (same for all local rows)
3224 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3225            of the in diagonal portion of the local (possibly different for each block
3226            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3227            and set it even if it is zero.
3228 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3229            submatrix (same for all local rows).
3230 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3231            off-diagonal portion of the local submatrix (possibly different for
3232            each block row) or NULL.
3233 
3234    Output Parameter:
3235 .  A - the matrix
3236 
3237    Options Database Keys:
3238 +   -mat_block_size - size of the blocks to use
3239 -   -mat_use_hash_table <fact>
3240 
3241    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3242    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3243    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3244 
3245    Notes:
3246    If the *_nnz parameter is given then the *_nz parameter is ignored
3247 
3248    A nonzero block is any block that as 1 or more nonzeros in it
3249 
3250    The user MUST specify either the local or global matrix dimensions
3251    (possibly both).
3252 
3253    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3254    than it must be used on all processors that share the object for that argument.
3255 
3256    Storage Information:
3257    For a square global matrix we define each processor's diagonal portion
3258    to be its local rows and the corresponding columns (a square submatrix);
3259    each processor's off-diagonal portion encompasses the remainder of the
3260    local matrix (a rectangular submatrix).
3261 
3262    The user can specify preallocated storage for the diagonal part of
3263    the local submatrix with either d_nz or d_nnz (not both).  Set
3264    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3265    memory allocation.  Likewise, specify preallocated storage for the
3266    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3267 
3268    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3269    the figure below we depict these three local rows and all columns (0-11).
3270 
3271 .vb
3272            0 1 2 3 4 5 6 7 8 9 10 11
3273           --------------------------
3274    row 3  |o o o d d d o o o o  o  o
3275    row 4  |o o o d d d o o o o  o  o
3276    row 5  |o o o d d d o o o o  o  o
3277           --------------------------
3278 .ve
3279 
3280    Thus, any entries in the d locations are stored in the d (diagonal)
3281    submatrix, and any entries in the o locations are stored in the
3282    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3283    stored simply in the MATSEQBAIJ format for compressed row storage.
3284 
3285    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3286    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3287    In general, for PDE problems in which most nonzeros are near the diagonal,
3288    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3289    or you will get TERRIBLE performance; see the users' manual chapter on
3290    matrices.
3291 
3292    Level: intermediate
3293 
3294 .keywords: matrix, block, aij, compressed row, sparse, parallel
3295 
3296 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3297 @*/
3298 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3299 {
3300   PetscErrorCode ierr;
3301   PetscMPIInt    size;
3302 
3303   PetscFunctionBegin;
3304   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3305   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3306   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3307   if (size > 1) {
3308     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3309     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3310   } else {
3311     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3312     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3313   }
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 #undef __FUNCT__
3318 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3319 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3320 {
3321   Mat            mat;
3322   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3323   PetscErrorCode ierr;
3324   PetscInt       len=0;
3325 
3326   PetscFunctionBegin;
3327   *newmat = 0;
3328   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3329   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3330   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3331   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3332 
3333   mat->factortype   = matin->factortype;
3334   mat->preallocated = PETSC_TRUE;
3335   mat->assembled    = PETSC_TRUE;
3336   mat->insertmode   = NOT_SET_VALUES;
3337 
3338   a             = (Mat_MPIBAIJ*)mat->data;
3339   mat->rmap->bs = matin->rmap->bs;
3340   a->bs2        = oldmat->bs2;
3341   a->mbs        = oldmat->mbs;
3342   a->nbs        = oldmat->nbs;
3343   a->Mbs        = oldmat->Mbs;
3344   a->Nbs        = oldmat->Nbs;
3345 
3346   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3347   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3348 
3349   a->size         = oldmat->size;
3350   a->rank         = oldmat->rank;
3351   a->donotstash   = oldmat->donotstash;
3352   a->roworiented  = oldmat->roworiented;
3353   a->rowindices   = 0;
3354   a->rowvalues    = 0;
3355   a->getrowactive = PETSC_FALSE;
3356   a->barray       = 0;
3357   a->rstartbs     = oldmat->rstartbs;
3358   a->rendbs       = oldmat->rendbs;
3359   a->cstartbs     = oldmat->cstartbs;
3360   a->cendbs       = oldmat->cendbs;
3361 
3362   /* hash table stuff */
3363   a->ht           = 0;
3364   a->hd           = 0;
3365   a->ht_size      = 0;
3366   a->ht_flag      = oldmat->ht_flag;
3367   a->ht_fact      = oldmat->ht_fact;
3368   a->ht_total_ct  = 0;
3369   a->ht_insert_ct = 0;
3370 
3371   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3372   if (oldmat->colmap) {
3373 #if defined(PETSC_USE_CTABLE)
3374     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3375 #else
3376     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3377     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3378     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3379 #endif
3380   } else a->colmap = 0;
3381 
3382   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3383     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3384     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3385     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3386   } else a->garray = 0;
3387 
3388   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3389   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3390   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3391   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3392   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3393 
3394   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3395   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3396   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3397   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3398   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3399   *newmat = mat;
3400   PetscFunctionReturn(0);
3401 }
3402 
3403 #undef __FUNCT__
3404 #define __FUNCT__ "MatLoad_MPIBAIJ"
3405 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3406 {
3407   PetscErrorCode ierr;
3408   int            fd;
3409   PetscInt       i,nz,j,rstart,rend;
3410   PetscScalar    *vals,*buf;
3411   MPI_Comm       comm;
3412   MPI_Status     status;
3413   PetscMPIInt    rank,size,maxnz;
3414   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3415   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3416   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3417   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3418   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3419   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3420 
3421   PetscFunctionBegin;
3422   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3423   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3424   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3425   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3426   if (bs < 0) bs = 1;
3427 
3428   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3429   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3430   if (!rank) {
3431     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3432     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3433     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3434   }
3435 
3436   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3437 
3438   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3439   M    = header[1]; N = header[2];
3440 
3441   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3442   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3443   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3444 
3445   /* If global sizes are set, check if they are consistent with that given in the file */
3446   if (sizesset) {
3447     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3448   }
3449   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3450   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3451 
3452   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3453 
3454   /*
3455      This code adds extra rows to make sure the number of rows is
3456      divisible by the blocksize
3457   */
3458   Mbs        = M/bs;
3459   extra_rows = bs - M + bs*Mbs;
3460   if (extra_rows == bs) extra_rows = 0;
3461   else                  Mbs++;
3462   if (extra_rows && !rank) {
3463     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3464   }
3465 
3466   /* determine ownership of all rows */
3467   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3468     mbs = Mbs/size + ((Mbs % size) > rank);
3469     m   = mbs*bs;
3470   } else { /* User set */
3471     m   = newmat->rmap->n;
3472     mbs = m/bs;
3473   }
3474   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3475   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3476 
3477   /* process 0 needs enough room for process with most rows */
3478   if (!rank) {
3479     mmax = rowners[1];
3480     for (i=2; i<=size; i++) {
3481       mmax = PetscMax(mmax,rowners[i]);
3482     }
3483     mmax*=bs;
3484   } else mmax = -1;             /* unused, but compiler warns anyway */
3485 
3486   rowners[0] = 0;
3487   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3488   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3489   rstart = rowners[rank];
3490   rend   = rowners[rank+1];
3491 
3492   /* distribute row lengths to all processors */
3493   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3494   if (!rank) {
3495     mend = m;
3496     if (size == 1) mend = mend - extra_rows;
3497     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3498     for (j=mend; j<m; j++) locrowlens[j] = 1;
3499     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3500     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3501     for (j=0; j<m; j++) {
3502       procsnz[0] += locrowlens[j];
3503     }
3504     for (i=1; i<size; i++) {
3505       mend = browners[i+1] - browners[i];
3506       if (i == size-1) mend = mend - extra_rows;
3507       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3508       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3509       /* calculate the number of nonzeros on each processor */
3510       for (j=0; j<browners[i+1]-browners[i]; j++) {
3511         procsnz[i] += rowlengths[j];
3512       }
3513       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3514     }
3515     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3516   } else {
3517     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3518   }
3519 
3520   if (!rank) {
3521     /* determine max buffer needed and allocate it */
3522     maxnz = procsnz[0];
3523     for (i=1; i<size; i++) {
3524       maxnz = PetscMax(maxnz,procsnz[i]);
3525     }
3526     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3527 
3528     /* read in my part of the matrix column indices  */
3529     nz     = procsnz[0];
3530     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3531     mycols = ibuf;
3532     if (size == 1) nz -= extra_rows;
3533     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3534     if (size == 1) {
3535       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3536     }
3537 
3538     /* read in every ones (except the last) and ship off */
3539     for (i=1; i<size-1; i++) {
3540       nz   = procsnz[i];
3541       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3542       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3543     }
3544     /* read in the stuff for the last proc */
3545     if (size != 1) {
3546       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3547       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3548       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3549       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3550     }
3551     ierr = PetscFree(cols);CHKERRQ(ierr);
3552   } else {
3553     /* determine buffer space needed for message */
3554     nz = 0;
3555     for (i=0; i<m; i++) {
3556       nz += locrowlens[i];
3557     }
3558     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3559     mycols = ibuf;
3560     /* receive message of column indices*/
3561     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3562     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3563     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3564   }
3565 
3566   /* loop over local rows, determining number of off diagonal entries */
3567   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3568   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3569   rowcount = 0; nzcount = 0;
3570   for (i=0; i<mbs; i++) {
3571     dcount  = 0;
3572     odcount = 0;
3573     for (j=0; j<bs; j++) {
3574       kmax = locrowlens[rowcount];
3575       for (k=0; k<kmax; k++) {
3576         tmp = mycols[nzcount++]/bs;
3577         if (!mask[tmp]) {
3578           mask[tmp] = 1;
3579           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3580           else masked1[dcount++] = tmp;
3581         }
3582       }
3583       rowcount++;
3584     }
3585 
3586     dlens[i]  = dcount;
3587     odlens[i] = odcount;
3588 
3589     /* zero out the mask elements we set */
3590     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3591     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3592   }
3593 
3594 
3595   if (!sizesset) {
3596     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3597   }
3598   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3599 
3600   if (!rank) {
3601     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3602     /* read in my part of the matrix numerical values  */
3603     nz     = procsnz[0];
3604     vals   = buf;
3605     mycols = ibuf;
3606     if (size == 1) nz -= extra_rows;
3607     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3608     if (size == 1) {
3609       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3610     }
3611 
3612     /* insert into matrix */
3613     jj = rstart*bs;
3614     for (i=0; i<m; i++) {
3615       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3616       mycols += locrowlens[i];
3617       vals   += locrowlens[i];
3618       jj++;
3619     }
3620     /* read in other processors (except the last one) and ship out */
3621     for (i=1; i<size-1; i++) {
3622       nz   = procsnz[i];
3623       vals = buf;
3624       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3625       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3626     }
3627     /* the last proc */
3628     if (size != 1) {
3629       nz   = procsnz[i] - extra_rows;
3630       vals = buf;
3631       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3632       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3633       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3634     }
3635     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3636   } else {
3637     /* receive numeric values */
3638     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3639 
3640     /* receive message of values*/
3641     vals   = buf;
3642     mycols = ibuf;
3643     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3644 
3645     /* insert into matrix */
3646     jj = rstart*bs;
3647     for (i=0; i<m; i++) {
3648       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3649       mycols += locrowlens[i];
3650       vals   += locrowlens[i];
3651       jj++;
3652     }
3653   }
3654   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3655   ierr = PetscFree(buf);CHKERRQ(ierr);
3656   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3657   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3658   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3659   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3660   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3661   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3662   PetscFunctionReturn(0);
3663 }
3664 
3665 #undef __FUNCT__
3666 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3667 /*@
3668    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3669 
3670    Input Parameters:
3671 .  mat  - the matrix
3672 .  fact - factor
3673 
3674    Not Collective, each process can use a different factor
3675 
3676    Level: advanced
3677 
3678   Notes:
3679    This can also be set by the command line option: -mat_use_hash_table <fact>
3680 
3681 .keywords: matrix, hashtable, factor, HT
3682 
3683 .seealso: MatSetOption()
3684 @*/
3685 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3686 {
3687   PetscErrorCode ierr;
3688 
3689   PetscFunctionBegin;
3690   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3691   PetscFunctionReturn(0);
3692 }
3693 
3694 #undef __FUNCT__
3695 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3696 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3697 {
3698   Mat_MPIBAIJ *baij;
3699 
3700   PetscFunctionBegin;
3701   baij          = (Mat_MPIBAIJ*)mat->data;
3702   baij->ht_fact = fact;
3703   PetscFunctionReturn(0);
3704 }
3705 
3706 #undef __FUNCT__
3707 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3708 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3709 {
3710   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3711 
3712   PetscFunctionBegin;
3713   if (Ad)     *Ad     = a->A;
3714   if (Ao)     *Ao     = a->B;
3715   if (colmap) *colmap = a->garray;
3716   PetscFunctionReturn(0);
3717 }
3718 
3719 /*
3720     Special version for direct calls from Fortran (to eliminate two function call overheads
3721 */
3722 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3723 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3724 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3725 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3726 #endif
3727 
3728 #undef __FUNCT__
3729 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3730 /*@C
3731   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3732 
3733   Collective on Mat
3734 
3735   Input Parameters:
3736 + mat - the matrix
3737 . min - number of input rows
3738 . im - input rows
3739 . nin - number of input columns
3740 . in - input columns
3741 . v - numerical values input
3742 - addvin - INSERT_VALUES or ADD_VALUES
3743 
3744   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3745 
3746   Level: advanced
3747 
3748 .seealso:   MatSetValuesBlocked()
3749 @*/
3750 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3751 {
3752   /* convert input arguments to C version */
3753   Mat        mat  = *matin;
3754   PetscInt   m    = *min, n = *nin;
3755   InsertMode addv = *addvin;
3756 
3757   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3758   const MatScalar *value;
3759   MatScalar       *barray     = baij->barray;
3760   PetscBool       roworiented = baij->roworiented;
3761   PetscErrorCode  ierr;
3762   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3763   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3764   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3765 
3766   PetscFunctionBegin;
3767   /* tasks normally handled by MatSetValuesBlocked() */
3768   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3769 #if defined(PETSC_USE_DEBUG)
3770   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3771   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3772 #endif
3773   if (mat->assembled) {
3774     mat->was_assembled = PETSC_TRUE;
3775     mat->assembled     = PETSC_FALSE;
3776   }
3777   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3778 
3779 
3780   if (!barray) {
3781     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3782     baij->barray = barray;
3783   }
3784 
3785   if (roworiented) stepval = (n-1)*bs;
3786   else stepval = (m-1)*bs;
3787 
3788   for (i=0; i<m; i++) {
3789     if (im[i] < 0) continue;
3790 #if defined(PETSC_USE_DEBUG)
3791     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3792 #endif
3793     if (im[i] >= rstart && im[i] < rend) {
3794       row = im[i] - rstart;
3795       for (j=0; j<n; j++) {
3796         /* If NumCol = 1 then a copy is not required */
3797         if ((roworiented) && (n == 1)) {
3798           barray = (MatScalar*)v + i*bs2;
3799         } else if ((!roworiented) && (m == 1)) {
3800           barray = (MatScalar*)v + j*bs2;
3801         } else { /* Here a copy is required */
3802           if (roworiented) {
3803             value = v + i*(stepval+bs)*bs + j*bs;
3804           } else {
3805             value = v + j*(stepval+bs)*bs + i*bs;
3806           }
3807           for (ii=0; ii<bs; ii++,value+=stepval) {
3808             for (jj=0; jj<bs; jj++) {
3809               *barray++ = *value++;
3810             }
3811           }
3812           barray -=bs2;
3813         }
3814 
3815         if (in[j] >= cstart && in[j] < cend) {
3816           col  = in[j] - cstart;
3817           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3818         } else if (in[j] < 0) continue;
3819 #if defined(PETSC_USE_DEBUG)
3820         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3821 #endif
3822         else {
3823           if (mat->was_assembled) {
3824             if (!baij->colmap) {
3825               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3826             }
3827 
3828 #if defined(PETSC_USE_DEBUG)
3829 #if defined(PETSC_USE_CTABLE)
3830             { PetscInt data;
3831               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3832               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3833             }
3834 #else
3835             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3836 #endif
3837 #endif
3838 #if defined(PETSC_USE_CTABLE)
3839             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3840             col  = (col - 1)/bs;
3841 #else
3842             col = (baij->colmap[in[j]] - 1)/bs;
3843 #endif
3844             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3845               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3846               col  =  in[j];
3847             }
3848           } else col = in[j];
3849           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3850         }
3851       }
3852     } else {
3853       if (!baij->donotstash) {
3854         if (roworiented) {
3855           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3856         } else {
3857           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3858         }
3859       }
3860     }
3861   }
3862 
3863   /* task normally handled by MatSetValuesBlocked() */
3864   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3865   PetscFunctionReturn(0);
3866 }
3867 
3868 #undef __FUNCT__
3869 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3870 /*@
3871      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3872          CSR format the local rows.
3873 
3874    Collective on MPI_Comm
3875 
3876    Input Parameters:
3877 +  comm - MPI communicator
3878 .  bs - the block size, only a block size of 1 is supported
3879 .  m - number of local rows (Cannot be PETSC_DECIDE)
3880 .  n - This value should be the same as the local size used in creating the
3881        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3882        calculated if N is given) For square matrices n is almost always m.
3883 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3884 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3885 .   i - row indices
3886 .   j - column indices
3887 -   a - matrix values
3888 
3889    Output Parameter:
3890 .   mat - the matrix
3891 
3892    Level: intermediate
3893 
3894    Notes:
3895        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3896      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3897      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3898 
3899      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3900      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3901      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3902      with column-major ordering within blocks.
3903 
3904        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3905 
3906 .keywords: matrix, aij, compressed row, sparse, parallel
3907 
3908 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3909           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3910 @*/
3911 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3912 {
3913   PetscErrorCode ierr;
3914 
3915   PetscFunctionBegin;
3916   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3917   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3918   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3919   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3920   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3921   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3922   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3923   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3924   PetscFunctionReturn(0);
3925 }
3926 
3927 #undef __FUNCT__
3928 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ"
3929 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3930 {
3931   PetscErrorCode ierr;
3932   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3933   PetscInt       *indx;
3934   PetscScalar    *values;
3935 
3936   PetscFunctionBegin;
3937   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3938   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3939     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3940     PetscInt       *dnz,*onz,sum,mbs,Nbs;
3941     PetscInt       *bindx,rmax=a->rmax,j;
3942 
3943     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3944     mbs = m/bs; Nbs = N/cbs;
3945     if (n == PETSC_DECIDE) {
3946       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3947     }
3948     /* Check sum(n) = Nbs */
3949     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3950     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3951 
3952     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3953     rstart -= mbs;
3954 
3955     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3956     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3957     for (i=0; i<mbs; i++) {
3958       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3959       nnz = nnz/bs;
3960       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3961       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3962       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3963     }
3964     ierr = PetscFree(bindx);CHKERRQ(ierr);
3965 
3966     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3967     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3968     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3969     ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr);
3970     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3971     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3972   }
3973 
3974   /* numeric phase */
3975   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3976   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3977 
3978   for (i=0; i<m; i++) {
3979     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3980     Ii   = i + rstart;
3981     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3982     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3983   }
3984   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3985   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3986   PetscFunctionReturn(0);
3987 }
3988