xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision c5b2ea42cd4070151795b0df3c8c47a133418b2b)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         if (roworiented) {
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
263         } else {
264           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
275 {
276   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
277   const PetscScalar *value;
278   MatScalar         *barray=baij->barray;
279   PetscBool         roworiented = baij->roworiented;
280   PetscErrorCode    ierr;
281   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
282   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
283   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
284 
285   PetscFunctionBegin;
286   if(!barray) {
287     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
288     baij->barray = barray;
289   }
290 
291   if (roworiented) {
292     stepval = (n-1)*bs;
293   } else {
294     stepval = (m-1)*bs;
295   }
296   for (i=0; i<m; i++) {
297     if (im[i] < 0) continue;
298 #if defined(PETSC_USE_DEBUG)
299     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
300 #endif
301     if (im[i] >= rstart && im[i] < rend) {
302       row = im[i] - rstart;
303       for (j=0; j<n; j++) {
304         /* If NumCol = 1 then a copy is not required */
305         if ((roworiented) && (n == 1)) {
306           barray = (MatScalar*)v + i*bs2;
307         } else if((!roworiented) && (m == 1)) {
308           barray = (MatScalar*)v + j*bs2;
309         } else { /* Here a copy is required */
310           if (roworiented) {
311             value = v + (i*(stepval+bs) + j)*bs;
312           } else {
313 	    value = v + (j*(stepval+bs) + i)*bs;
314           }
315           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
316             for (jj=0; jj<bs; jj++) {
317               barray[jj]  = value[jj];
318             }
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend){
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         }
328         else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined (PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined (PETSC_USE_CTABLE)
349 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
355               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col =  in[j];
357             }
358           }
359           else col = in[j];
360           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
361         }
362       }
363     } else {
364       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscBool      roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399   if (v) PetscValidScalarPointer(v,6);
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscBool         roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = sqrt(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
662       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
663       v = amat->a; jj = amat->j;
664       for (i=0; i<amat->nz; i++) {
665         for (j=0; j<bs; j++){
666           col = bs*(cstart + *jj) + j; /* column index */
667           for (row=0; row<bs; row++){
668             tmp[col] += PetscAbsScalar(*v);  v++;
669           }
670         }
671         jj++;
672       }
673       v = bmat->a; jj = bmat->j;
674       for (i=0; i<bmat->nz; i++) {
675         for (j=0; j<bs; j++){
676           col = bs*garray[*jj] + j;
677           for (row=0; row<bs; row++){
678             tmp[col] += PetscAbsScalar(*v); v++;
679           }
680         }
681         jj++;
682       }
683       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
684       *nrm = 0.0;
685       for (j=0; j<mat->cmap->N; j++) {
686         if (tmp2[j] > *nrm) *nrm = tmp2[j];
687       }
688       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY) { /* max row sum */
690       PetscReal *sums;
691       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
692       sum = 0.0;
693       for (j=0; j<amat->mbs; j++) {
694         for (row=0; row<bs; row++) sums[row] = 0.0;
695         v = amat->a + bs2*amat->i[j];
696         nz = amat->i[j+1]-amat->i[j];
697         for (i=0; i<nz; i++) {
698           for (col=0; col<bs; col++){
699             for (row=0; row<bs; row++){
700               sums[row] += PetscAbsScalar(*v); v++;
701             }
702           }
703         }
704         v = bmat->a + bs2*bmat->i[j];
705         nz = bmat->i[j+1]-bmat->i[j];
706         for (i=0; i<nz; i++) {
707           for (col=0; col<bs; col++){
708             for (row=0; row<bs; row++){
709               sums[row] += PetscAbsScalar(*v); v++;
710             }
711           }
712         }
713         for (row=0; row<bs; row++){
714           if (sums[row] > sum) sum = sums[row];
715         }
716       }
717       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
718       ierr = PetscFree(sums);CHKERRQ(ierr);
719     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 /*
725   Creates the hash table, and sets the table
726   This table is created only once.
727   If new entried need to be added to the matrix
728   then the hash table has to be destroyed and
729   recreated.
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
734 {
735   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
736   Mat            A = baij->A,B=baij->B;
737   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
738   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
739   PetscErrorCode ierr;
740   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
741   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
742   PetscInt       *HT,key;
743   MatScalar      **HD;
744   PetscReal      tmp;
745 #if defined(PETSC_USE_INFO)
746   PetscInt       ct=0,max=0;
747 #endif
748 
749   PetscFunctionBegin;
750   if (baij->ht) PetscFunctionReturn(0);
751 
752   baij->ht_size = (PetscInt)(factor*nz);
753   ht_size       = baij->ht_size;
754 
755   /* Allocate Memory for Hash Table */
756   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
757   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
759   HD   = baij->hd;
760   HT   = baij->ht;
761 
762   /* Loop Over A */
763   for (i=0; i<a->mbs; i++) {
764     for (j=ai[i]; j<ai[i+1]; j++) {
765       row = i+rstart;
766       col = aj[j]+cstart;
767 
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++){
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = a->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786   /* Loop Over B */
787   for (i=0; i<b->mbs; i++) {
788     for (j=bi[i]; j<bi[i+1]; j++) {
789       row = i+rstart;
790       col = garray[bj[j]];
791       key = row*Nbs + col + 1;
792       h1  = HASH(ht_size,key,tmp);
793       for (k=0; k<ht_size; k++){
794         if (!HT[(h1+k)%ht_size]) {
795           HT[(h1+k)%ht_size] = key;
796           HD[(h1+k)%ht_size] = b->a + j*bs2;
797           break;
798 #if defined(PETSC_USE_INFO)
799         } else {
800           ct++;
801 #endif
802         }
803       }
804 #if defined(PETSC_USE_INFO)
805       if (k> max) max = k;
806 #endif
807     }
808   }
809 
810   /* Print Summary */
811 #if defined(PETSC_USE_INFO)
812   for (i=0,j=0; i<ht_size; i++) {
813     if (HT[i]) {j++;}
814   }
815   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
816 #endif
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
823 {
824   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
825   PetscErrorCode ierr;
826   PetscInt       nstash,reallocs;
827   InsertMode     addv;
828 
829   PetscFunctionBegin;
830   if (baij->donotstash || mat->nooffprocentries) {
831     PetscFunctionReturn(0);
832   }
833 
834   /* make sure all processors are either in INSERTMODE or ADDMODE */
835   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
836   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837   mat->insertmode = addv; /* in case this processor had no cache */
838 
839   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
840   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
841   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
842   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
851 {
852   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
853   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
854   PetscErrorCode ierr;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscBool      r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   InsertMode     addv = mat->insertmode;
860   PetscMPIInt    n;
861 
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   PetscFunctionBegin;
864   if (!baij->donotstash && !mat->nooffprocentries) {
865     while (1) {
866       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
867       if (!flg) break;
868 
869       for (i=0; i<n;) {
870         /* Now identify the consecutive vals belonging to the same row */
871         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
872         if (j < n) ncols = j-i;
873         else       ncols = n-i;
874         /* Now assemble all these values with a single function call */
875         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
876         i = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
880     /* Now process the block-stash. Since the values are stashed column-oriented,
881        set the roworiented flag to column oriented, and after MatSetValues()
882        restore the original flags */
883     r1 = baij->roworiented;
884     r2 = a->roworiented;
885     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
886     baij->roworiented = PETSC_FALSE;
887     a->roworiented    = PETSC_FALSE;
888     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
889     while (1) {
890       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
891       if (!flg) break;
892 
893       for (i=0; i<n;) {
894         /* Now identify the consecutive vals belonging to the same row */
895         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
896         if (j < n) ncols = j-i;
897         else       ncols = n-i;
898         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
899         i = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
903     baij->roworiented = r1;
904     a->roworiented    = r2;
905     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
906   }
907 
908   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
910 
911   /* determine if any processor has disassembled, if so we must
912      also disassemble ourselfs, in order that we may reassemble. */
913   /*
914      if nonzero structure of submatrix B cannot change then we know that
915      no processor disassembled thus we can skip this stuff
916   */
917   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
918     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
919     if (mat->was_assembled && !other_disassembled) {
920       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
921     }
922   }
923 
924   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
925     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
926   }
927   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
928   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
930 
931 #if defined(PETSC_USE_INFO)
932   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
933     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
934     baij->ht_total_ct  = 0;
935     baij->ht_insert_ct = 0;
936   }
937 #endif
938   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
939     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
940     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
941     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
942   }
943 
944   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
945   baij->rowvalues = 0;
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
952 {
953   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
954   PetscErrorCode    ierr;
955   PetscMPIInt       size = baij->size,rank = baij->rank;
956   PetscInt          bs = mat->rmap->bs;
957   PetscBool         iascii,isdraw;
958   PetscViewer       sviewer;
959   PetscViewerFormat format;
960 
961   PetscFunctionBegin;
962   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
964   if (iascii) {
965     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
966     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
967       MatInfo info;
968       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
969       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
970       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
972              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
973       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
974       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
979       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
980       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
981       PetscFunctionReturn(0);
982     } else if (format == PETSC_VIEWER_ASCII_INFO) {
983       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
984       PetscFunctionReturn(0);
985     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
986       PetscFunctionReturn(0);
987     }
988   }
989 
990   if (isdraw) {
991     PetscDraw       draw;
992     PetscBool  isnull;
993     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
994     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
995   }
996 
997   if (size == 1) {
998     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
999     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1000   } else {
1001     /* assemble the entire matrix onto first processor. */
1002     Mat         A;
1003     Mat_SeqBAIJ *Aloc;
1004     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1005     MatScalar   *a;
1006 
1007     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1008     /* Perhaps this should be the type of mat? */
1009     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1010     if (!rank) {
1011       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1012     } else {
1013       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1014     }
1015     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1016     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1017     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1018 
1019     /* copy over the A part */
1020     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1021     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1022     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1023 
1024     for (i=0; i<mbs; i++) {
1025       rvals[0] = bs*(baij->rstartbs + i);
1026       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1027       for (j=ai[i]; j<ai[i+1]; j++) {
1028         col = (baij->cstartbs+aj[j])*bs;
1029         for (k=0; k<bs; k++) {
1030           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1031           col++; a += bs;
1032         }
1033       }
1034     }
1035     /* copy over the B part */
1036     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1037     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1038     for (i=0; i<mbs; i++) {
1039       rvals[0] = bs*(baij->rstartbs + i);
1040       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1041       for (j=ai[i]; j<ai[i+1]; j++) {
1042         col = baij->garray[aj[j]]*bs;
1043         for (k=0; k<bs; k++) {
1044           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1045           col++; a += bs;
1046         }
1047       }
1048     }
1049     ierr = PetscFree(rvals);CHKERRQ(ierr);
1050     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1051     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     /*
1053        Everyone has to call to draw the matrix since the graphics waits are
1054        synchronized across all processors that share the PetscDraw object
1055     */
1056     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1057     if (!rank) {
1058       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1059     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1060       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1061       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1062     }
1063     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1064     ierr = MatDestroy(A);CHKERRQ(ierr);
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1071 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1072 {
1073   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1074   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1075   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1076   PetscErrorCode ierr;
1077   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1078   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1079   int            fd;
1080   PetscScalar    *column_values;
1081   FILE           *file;
1082   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1083   PetscInt       message_count,flowcontrolcount;
1084 
1085   PetscFunctionBegin;
1086   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1087   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1088   nz   = bs2*(A->nz + B->nz);
1089   rlen = mat->rmap->n;
1090   if (!rank) {
1091     header[0] = MAT_FILE_CLASSID;
1092     header[1] = mat->rmap->N;
1093     header[2] = mat->cmap->N;
1094     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1095     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1096     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1097     /* get largest number of rows any processor has */
1098     range = mat->rmap->range;
1099     for (i=1; i<size; i++) {
1100       rlen = PetscMax(rlen,range[i+1] - range[i]);
1101     }
1102   } else {
1103     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1104   }
1105 
1106   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1107   /* compute lengths of each row  */
1108   for (i=0; i<a->mbs; i++) {
1109     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1110   }
1111   /* store the row lengths to the file */
1112   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1113   if (!rank) {
1114     MPI_Status status;
1115     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1116     rlen  = (range[1] - range[0])/bs;
1117     for (i=0; i<rlen; i++) {
1118       for (j=0; j<bs; j++) {
1119         row_lens[i*bs+j] = bs*crow_lens[i];
1120       }
1121     }
1122     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1123     for (i=1; i<size; i++) {
1124       rlen = (range[i+1] - range[i])/bs;
1125       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1126       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1127       for (k=0; k<rlen; k++) {
1128 	for (j=0; j<bs; j++) {
1129 	  row_lens[k*bs+j] = bs*crow_lens[k];
1130 	}
1131       }
1132       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1133     }
1134     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1135     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1136   } else {
1137     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1138     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1139     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1140   }
1141   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1142 
1143   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1144      information needed to make it for each row from a block row. This does require more communication but still not more than
1145      the communication needed for the nonzero values  */
1146   nzmax = nz; /*  space a largest processor needs */
1147   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1148   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1149   cnt  = 0;
1150   for (i=0; i<a->mbs; i++) {
1151     pcnt = cnt;
1152     for (j=B->i[i]; j<B->i[i+1]; j++) {
1153       if ( (col = garray[B->j[j]]) > cstart) break;
1154       for (l=0; l<bs; l++) {
1155 	column_indices[cnt++] = bs*col+l;
1156       }
1157     }
1158     for (k=A->i[i]; k<A->i[i+1]; k++) {
1159       for (l=0; l<bs; l++) {
1160         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1161       }
1162     }
1163     for (; j<B->i[i+1]; j++) {
1164       for (l=0; l<bs; l++) {
1165         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1166       }
1167     }
1168     len = cnt - pcnt;
1169     for (k=1; k<bs; k++) {
1170       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1171       cnt += len;
1172     }
1173   }
1174   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1175 
1176   /* store the columns to the file */
1177   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1178   if (!rank) {
1179     MPI_Status status;
1180     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1181     for (i=1; i<size; i++) {
1182       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1183       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1184       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1185       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1188   } else {
1189     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1190     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1191     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1192     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1193   }
1194   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1195 
1196   /* load up the numerical values */
1197   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1198   cnt = 0;
1199   for (i=0; i<a->mbs; i++) {
1200     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1201     for (j=B->i[i]; j<B->i[i+1]; j++) {
1202       if ( garray[B->j[j]] > cstart) break;
1203       for (l=0; l<bs; l++) {
1204         for (ll=0; ll<bs; ll++) {
1205 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1206         }
1207       }
1208       cnt += bs;
1209     }
1210     for (k=A->i[i]; k<A->i[i+1]; k++) {
1211       for (l=0; l<bs; l++) {
1212         for (ll=0; ll<bs; ll++) {
1213           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1214         }
1215       }
1216       cnt += bs;
1217     }
1218     for (; j<B->i[i+1]; j++) {
1219       for (l=0; l<bs; l++) {
1220         for (ll=0; ll<bs; ll++) {
1221 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1222         }
1223       }
1224       cnt += bs;
1225     }
1226     cnt += (bs-1)*rlen;
1227   }
1228   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1229 
1230   /* store the column values to the file */
1231   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1232   if (!rank) {
1233     MPI_Status status;
1234     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1235     for (i=1; i<size; i++) {
1236       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1237       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1238       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1239       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1240     }
1241     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1242   } else {
1243     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1244     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1245     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1246     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1247   }
1248   ierr = PetscFree(column_values);CHKERRQ(ierr);
1249 
1250   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1251   if (file) {
1252     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1253   }
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatView_MPIBAIJ"
1259 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1260 {
1261   PetscErrorCode ierr;
1262   PetscBool      iascii,isdraw,issocket,isbinary;
1263 
1264   PetscFunctionBegin;
1265   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1266   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1269   if (iascii || isdraw || issocket) {
1270     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1271   } else if (isbinary) {
1272     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1273   } else {
1274     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1281 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1282 {
1283   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287 #if defined(PETSC_USE_LOG)
1288   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1289 #endif
1290   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1291   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1292   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1293   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1294 #if defined (PETSC_USE_CTABLE)
1295   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1296 #else
1297   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1298 #endif
1299   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1300   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1301   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1302   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1303   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1304   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1305   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1306   ierr = PetscFree(baij);CHKERRQ(ierr);
1307 
1308   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatMult_MPIBAIJ"
1321 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1322 {
1323   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325   PetscInt       nt;
1326 
1327   PetscFunctionBegin;
1328   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1329   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1330   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1331   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1332   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1333   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1334   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1341 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1342 {
1343   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1348   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1349   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1350   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1356 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1357 {
1358   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360   PetscBool      merged;
1361 
1362   PetscFunctionBegin;
1363   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1364   /* do nondiagonal part */
1365   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1366   if (!merged) {
1367     /* send it on its way */
1368     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1369     /* do local part */
1370     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1371     /* receive remote parts: note this assumes the values are not actually */
1372     /* inserted in yy until the next line */
1373     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1374   } else {
1375     /* do local part */
1376     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1377     /* send it on its way */
1378     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1379     /* values actually were received in the Begin() but we need to call this nop */
1380     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1387 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1388 {
1389   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1390   PetscErrorCode ierr;
1391 
1392   PetscFunctionBegin;
1393   /* do nondiagonal part */
1394   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1395   /* send it on its way */
1396   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1397   /* do local part */
1398   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1399   /* receive remote parts: note this assumes the values are not actually */
1400   /* inserted in yy until the next line, which is true for my implementation*/
1401   /* but is not perhaps always true. */
1402   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /*
1407   This only works correctly for square matrices where the subblock A->A is the
1408    diagonal block
1409 */
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1412 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1413 {
1414   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1415   PetscErrorCode ierr;
1416 
1417   PetscFunctionBegin;
1418   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1419   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatScale_MPIBAIJ"
1425 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1426 {
1427   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1428   PetscErrorCode ierr;
1429 
1430   PetscFunctionBegin;
1431   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1432   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1438 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1439 {
1440   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1441   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1442   PetscErrorCode ierr;
1443   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1444   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1445   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1446 
1447   PetscFunctionBegin;
1448   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1449   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1450   mat->getrowactive = PETSC_TRUE;
1451 
1452   if (!mat->rowvalues && (idx || v)) {
1453     /*
1454         allocate enough space to hold information from the longest row.
1455     */
1456     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1457     PetscInt     max = 1,mbs = mat->mbs,tmp;
1458     for (i=0; i<mbs; i++) {
1459       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1460       if (max < tmp) { max = tmp; }
1461     }
1462     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1463   }
1464   lrow = row - brstart;
1465 
1466   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1467   if (!v)   {pvA = 0; pvB = 0;}
1468   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1469   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1470   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1471   nztot = nzA + nzB;
1472 
1473   cmap  = mat->garray;
1474   if (v  || idx) {
1475     if (nztot) {
1476       /* Sort by increasing column numbers, assuming A and B already sorted */
1477       PetscInt imark = -1;
1478       if (v) {
1479         *v = v_p = mat->rowvalues;
1480         for (i=0; i<nzB; i++) {
1481           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1482           else break;
1483         }
1484         imark = i;
1485         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1486         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1487       }
1488       if (idx) {
1489         *idx = idx_p = mat->rowindices;
1490         if (imark > -1) {
1491           for (i=0; i<imark; i++) {
1492             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1493           }
1494         } else {
1495           for (i=0; i<nzB; i++) {
1496             if (cmap[cworkB[i]/bs] < cstart)
1497               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1498             else break;
1499           }
1500           imark = i;
1501         }
1502         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1503         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1504       }
1505     } else {
1506       if (idx) *idx = 0;
1507       if (v)   *v   = 0;
1508     }
1509   }
1510   *nz = nztot;
1511   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1512   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1518 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1519 {
1520   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1521 
1522   PetscFunctionBegin;
1523   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1524   baij->getrowactive = PETSC_FALSE;
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1530 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1531 {
1532   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1533   PetscErrorCode ierr;
1534 
1535   PetscFunctionBegin;
1536   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1537   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1543 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1544 {
1545   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1546   Mat            A = a->A,B = a->B;
1547   PetscErrorCode ierr;
1548   PetscReal      isend[5],irecv[5];
1549 
1550   PetscFunctionBegin;
1551   info->block_size     = (PetscReal)matin->rmap->bs;
1552   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1553   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1554   isend[3] = info->memory;  isend[4] = info->mallocs;
1555   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1556   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1557   isend[3] += info->memory;  isend[4] += info->mallocs;
1558   if (flag == MAT_LOCAL) {
1559     info->nz_used      = isend[0];
1560     info->nz_allocated = isend[1];
1561     info->nz_unneeded  = isend[2];
1562     info->memory       = isend[3];
1563     info->mallocs      = isend[4];
1564   } else if (flag == MAT_GLOBAL_MAX) {
1565     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1566     info->nz_used      = irecv[0];
1567     info->nz_allocated = irecv[1];
1568     info->nz_unneeded  = irecv[2];
1569     info->memory       = irecv[3];
1570     info->mallocs      = irecv[4];
1571   } else if (flag == MAT_GLOBAL_SUM) {
1572     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1573     info->nz_used      = irecv[0];
1574     info->nz_allocated = irecv[1];
1575     info->nz_unneeded  = irecv[2];
1576     info->memory       = irecv[3];
1577     info->mallocs      = irecv[4];
1578   } else {
1579     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1580   }
1581   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1582   info->fill_ratio_needed = 0;
1583   info->factor_mallocs    = 0;
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1589 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1590 {
1591   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   switch (op) {
1596   case MAT_NEW_NONZERO_LOCATIONS:
1597   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1598   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1599   case MAT_KEEP_NONZERO_PATTERN:
1600   case MAT_NEW_NONZERO_LOCATION_ERR:
1601     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1602     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1603     break;
1604   case MAT_ROW_ORIENTED:
1605     a->roworiented = flg;
1606     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1607     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1608     break;
1609   case MAT_NEW_DIAGONALS:
1610     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1611     break;
1612   case MAT_IGNORE_OFF_PROC_ENTRIES:
1613     a->donotstash = flg;
1614     break;
1615   case MAT_USE_HASH_TABLE:
1616     a->ht_flag = flg;
1617     break;
1618   case MAT_SYMMETRIC:
1619   case MAT_STRUCTURALLY_SYMMETRIC:
1620   case MAT_HERMITIAN:
1621   case MAT_SYMMETRY_ETERNAL:
1622     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1623     break;
1624   default:
1625     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1626   }
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 #undef __FUNCT__
1631 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1632 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1633 {
1634   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1635   Mat_SeqBAIJ    *Aloc;
1636   Mat            B;
1637   PetscErrorCode ierr;
1638   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1639   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1640   MatScalar      *a;
1641 
1642   PetscFunctionBegin;
1643   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1644   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1645     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1646     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1647     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1648     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1649   } else {
1650     B = *matout;
1651   }
1652 
1653   /* copy over the A part */
1654   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1655   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1656   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1657 
1658   for (i=0; i<mbs; i++) {
1659     rvals[0] = bs*(baij->rstartbs + i);
1660     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1661     for (j=ai[i]; j<ai[i+1]; j++) {
1662       col = (baij->cstartbs+aj[j])*bs;
1663       for (k=0; k<bs; k++) {
1664         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1665         col++; a += bs;
1666       }
1667     }
1668   }
1669   /* copy over the B part */
1670   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1671   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1672   for (i=0; i<mbs; i++) {
1673     rvals[0] = bs*(baij->rstartbs + i);
1674     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1675     for (j=ai[i]; j<ai[i+1]; j++) {
1676       col = baij->garray[aj[j]]*bs;
1677       for (k=0; k<bs; k++) {
1678         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1679         col++; a += bs;
1680       }
1681     }
1682   }
1683   ierr = PetscFree(rvals);CHKERRQ(ierr);
1684   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686 
1687   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1688     *matout = B;
1689   } else {
1690     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1691   }
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1697 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1698 {
1699   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1700   Mat            a = baij->A,b = baij->B;
1701   PetscErrorCode ierr;
1702   PetscInt       s1,s2,s3;
1703 
1704   PetscFunctionBegin;
1705   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1706   if (rr) {
1707     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1708     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1709     /* Overlap communication with computation. */
1710     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1711   }
1712   if (ll) {
1713     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1714     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1715     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1716   }
1717   /* scale  the diagonal block */
1718   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1719 
1720   if (rr) {
1721     /* Do a scatter end and then right scale the off-diagonal block */
1722     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1723     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1724   }
1725 
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1731 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1732 {
1733   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1734   PetscErrorCode    ierr;
1735   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1736   PetscInt          i,*owners = A->rmap->range;
1737   PetscInt          *nprocs,j,idx,nsends,row;
1738   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1739   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1740   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1741   MPI_Comm          comm = ((PetscObject)A)->comm;
1742   MPI_Request       *send_waits,*recv_waits;
1743   MPI_Status        recv_status,*send_status;
1744   const PetscScalar *xx;
1745   PetscScalar       *bb;
1746 #if defined(PETSC_DEBUG)
1747   PetscBool         found = PETSC_FALSE;
1748 #endif
1749 
1750   PetscFunctionBegin;
1751   /*  first count number of contributors to each processor */
1752   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1753   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1755   j = 0;
1756   for (i=0; i<N; i++) {
1757     if (lastidx > (idx = rows[i])) j = 0;
1758     lastidx = idx;
1759     for (; j<size; j++) {
1760       if (idx >= owners[j] && idx < owners[j+1]) {
1761         nprocs[2*j]++;
1762         nprocs[2*j+1] = 1;
1763         owner[i] = j;
1764 #if defined(PETSC_DEBUG)
1765         found = PETSC_TRUE;
1766 #endif
1767         break;
1768       }
1769     }
1770 #if defined(PETSC_DEBUG)
1771     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1772     found = PETSC_FALSE;
1773 #endif
1774   }
1775   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1776 
1777   if (A->nooffproczerorows) {
1778     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1779     nrecvs = nsends;
1780     nmax   = N;
1781   } else {
1782     /* inform other processors of number of messages and max length*/
1783     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1784   }
1785 
1786   /* post receives:   */
1787   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1788   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1789   for (i=0; i<nrecvs; i++) {
1790     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1791   }
1792 
1793   /* do sends:
1794      1) starts[i] gives the starting index in svalues for stuff going to
1795      the ith processor
1796   */
1797   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1798   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1799   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1800   starts[0]  = 0;
1801   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1802   for (i=0; i<N; i++) {
1803     svalues[starts[owner[i]]++] = rows[i];
1804   }
1805 
1806   starts[0] = 0;
1807   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1808   count = 0;
1809   for (i=0; i<size; i++) {
1810     if (nprocs[2*i+1]) {
1811       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1812     }
1813   }
1814   ierr = PetscFree(starts);CHKERRQ(ierr);
1815 
1816   base = owners[rank];
1817 
1818   /*  wait on receives */
1819   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1820   count  = nrecvs;
1821   slen = 0;
1822   while (count) {
1823     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1824     /* unpack receives into our local space */
1825     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1826     source[imdex]  = recv_status.MPI_SOURCE;
1827     lens[imdex]    = n;
1828     slen          += n;
1829     count--;
1830   }
1831   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1832 
1833   /* move the data into the send scatter */
1834   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1835   count = 0;
1836   for (i=0; i<nrecvs; i++) {
1837     values = rvalues + i*nmax;
1838     for (j=0; j<lens[i]; j++) {
1839       lrows[count++] = values[j] - base;
1840     }
1841   }
1842   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1843   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1844   ierr = PetscFree(owner);CHKERRQ(ierr);
1845   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1846 
1847   /* fix right hand side if needed */
1848   if (x && b) {
1849     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1850     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1851     for (i=0; i<slen; i++) {
1852       bb[lrows[i]] = diag*xx[lrows[i]];
1853     }
1854     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1855     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1856   }
1857 
1858   /* actually zap the local rows */
1859   /*
1860         Zero the required rows. If the "diagonal block" of the matrix
1861      is square and the user wishes to set the diagonal we use separate
1862      code so that MatSetValues() is not called for each diagonal allocating
1863      new memory, thus calling lots of mallocs and slowing things down.
1864 
1865   */
1866   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1867   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1868   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1869     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1870   } else if (diag != 0.0) {
1871     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1872     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1873        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1874     for (i=0; i<slen; i++) {
1875       row  = lrows[i] + rstart_bs;
1876       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1877     }
1878     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1879     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1880   } else {
1881     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1882   }
1883 
1884   ierr = PetscFree(lrows);CHKERRQ(ierr);
1885 
1886   /* wait on sends */
1887   if (nsends) {
1888     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1889     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1890     ierr = PetscFree(send_status);CHKERRQ(ierr);
1891   }
1892   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1893   ierr = PetscFree(svalues);CHKERRQ(ierr);
1894 
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1900 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1901 {
1902   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1903   PetscErrorCode ierr;
1904 
1905   PetscFunctionBegin;
1906   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatEqual_MPIBAIJ"
1914 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1915 {
1916   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1917   Mat            a,b,c,d;
1918   PetscBool      flg;
1919   PetscErrorCode ierr;
1920 
1921   PetscFunctionBegin;
1922   a = matA->A; b = matA->B;
1923   c = matB->A; d = matB->B;
1924 
1925   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1926   if (flg) {
1927     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1928   }
1929   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatCopy_MPIBAIJ"
1935 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1936 {
1937   PetscErrorCode ierr;
1938   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1939   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1940 
1941   PetscFunctionBegin;
1942   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1943   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1944     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1945   } else {
1946     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1947     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1948   }
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1954 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1955 {
1956   PetscErrorCode ierr;
1957 
1958   PetscFunctionBegin;
1959   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1965 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1966 {
1967   PetscErrorCode ierr;
1968   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1969   PetscBLASInt   bnz,one=1;
1970   Mat_SeqBAIJ    *x,*y;
1971 
1972   PetscFunctionBegin;
1973   if (str == SAME_NONZERO_PATTERN) {
1974     PetscScalar alpha = a;
1975     x = (Mat_SeqBAIJ *)xx->A->data;
1976     y = (Mat_SeqBAIJ *)yy->A->data;
1977     bnz = PetscBLASIntCast(x->nz);
1978     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1979     x = (Mat_SeqBAIJ *)xx->B->data;
1980     y = (Mat_SeqBAIJ *)yy->B->data;
1981     bnz = PetscBLASIntCast(x->nz);
1982     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1983   } else {
1984     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1985   }
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1991 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1992 {
1993   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1994   PetscInt rbs,cbs;
1995   PetscErrorCode ierr;
1996 
1997   PetscFunctionBegin;
1998   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1999   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2000   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2001   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2002   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2003   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2009 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2010 {
2011   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2012   PetscErrorCode ierr;
2013 
2014   PetscFunctionBegin;
2015   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2016   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2022 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2023 {
2024   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2025   PetscErrorCode ierr;
2026 
2027   PetscFunctionBegin;
2028   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2029   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2035 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2036 {
2037   PetscErrorCode ierr;
2038   IS             iscol_local;
2039   PetscInt       csize;
2040 
2041   PetscFunctionBegin;
2042   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2043   if (call == MAT_REUSE_MATRIX) {
2044     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2045     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2046   } else {
2047     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2048   }
2049   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2050   if (call == MAT_INITIAL_MATRIX) {
2051     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2052     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
2053   }
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2059 /*
2060     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2061   in local and then by concatenating the local matrices the end result.
2062   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2063 */
2064 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2065 {
2066   PetscErrorCode ierr;
2067   PetscMPIInt    rank,size;
2068   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2069   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2070   Mat            *local,M,Mreuse;
2071   MatScalar      *vwork,*aa;
2072   MPI_Comm       comm = ((PetscObject)mat)->comm;
2073   Mat_SeqBAIJ    *aij;
2074 
2075 
2076   PetscFunctionBegin;
2077   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2078   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2079 
2080   if (call ==  MAT_REUSE_MATRIX) {
2081     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2082     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2083     local = &Mreuse;
2084     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2085   } else {
2086     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2087     Mreuse = *local;
2088     ierr   = PetscFree(local);CHKERRQ(ierr);
2089   }
2090 
2091   /*
2092       m - number of local rows
2093       n - number of columns (same on all processors)
2094       rstart - first row in new global matrix generated
2095   */
2096   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2097   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2098   m    = m/bs;
2099   n    = n/bs;
2100 
2101   if (call == MAT_INITIAL_MATRIX) {
2102     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2103     ii  = aij->i;
2104     jj  = aij->j;
2105 
2106     /*
2107         Determine the number of non-zeros in the diagonal and off-diagonal
2108         portions of the matrix in order to do correct preallocation
2109     */
2110 
2111     /* first get start and end of "diagonal" columns */
2112     if (csize == PETSC_DECIDE) {
2113       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2114       if (mglobal == n*bs) { /* square matrix */
2115 	nlocal = m;
2116       } else {
2117         nlocal = n/size + ((n % size) > rank);
2118       }
2119     } else {
2120       nlocal = csize/bs;
2121     }
2122     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2123     rstart = rend - nlocal;
2124     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2125 
2126     /* next, compute all the lengths */
2127     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2128     olens = dlens + m;
2129     for (i=0; i<m; i++) {
2130       jend = ii[i+1] - ii[i];
2131       olen = 0;
2132       dlen = 0;
2133       for (j=0; j<jend; j++) {
2134         if (*jj < rstart || *jj >= rend) olen++;
2135         else dlen++;
2136         jj++;
2137       }
2138       olens[i] = olen;
2139       dlens[i] = dlen;
2140     }
2141     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2142     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2143     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2144     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2145     ierr = PetscFree(dlens);CHKERRQ(ierr);
2146   } else {
2147     PetscInt ml,nl;
2148 
2149     M = *newmat;
2150     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2151     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2152     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2153     /*
2154          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2155        rather than the slower MatSetValues().
2156     */
2157     M->was_assembled = PETSC_TRUE;
2158     M->assembled     = PETSC_FALSE;
2159   }
2160   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2161   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2162   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2163   ii  = aij->i;
2164   jj  = aij->j;
2165   aa  = aij->a;
2166   for (i=0; i<m; i++) {
2167     row   = rstart/bs + i;
2168     nz    = ii[i+1] - ii[i];
2169     cwork = jj;     jj += nz;
2170     vwork = aa;     aa += nz;
2171     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2172   }
2173 
2174   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2175   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2176   *newmat = M;
2177 
2178   /* save submatrix used in processor for next request */
2179   if (call ==  MAT_INITIAL_MATRIX) {
2180     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2181     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2182   }
2183 
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatPermute_MPIBAIJ"
2189 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2190 {
2191   MPI_Comm       comm,pcomm;
2192   PetscInt       first,local_size,nrows;
2193   const PetscInt *rows;
2194   PetscMPIInt    size;
2195   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2196   PetscErrorCode ierr;
2197 
2198   PetscFunctionBegin;
2199   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2200   /* make a collective version of 'rowp' */
2201   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2202   if (pcomm==comm) {
2203     crowp = rowp;
2204   } else {
2205     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2206     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2207     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2208     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2209   }
2210   /* collect the global row permutation and invert it */
2211   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2212   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2213   if (pcomm!=comm) {
2214     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2215   }
2216   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2217   /* get the local target indices */
2218   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2219   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2220   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2221   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2222   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2223   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2224   /* the column permutation is so much easier;
2225      make a local version of 'colp' and invert it */
2226   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2227   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2228   if (size==1) {
2229     lcolp = colp;
2230   } else {
2231     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2232     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2233     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2234   }
2235   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2236   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2237   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2238   if (size>1) {
2239     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2240     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2241   }
2242   /* now we just get the submatrix */
2243   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2244   /* clean up */
2245   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2246   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #undef __FUNCT__
2251 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2252 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2253 {
2254   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2255   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2256 
2257   PetscFunctionBegin;
2258   if (nghosts) { *nghosts = B->nbs;}
2259   if (ghosts) {*ghosts = baij->garray;}
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2267 /*
2268     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2269 */
2270 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2271 {
2272   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2273   PetscErrorCode        ierr;
2274   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2275   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2276   const PetscInt        *is;
2277   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2278   PetscInt              *rowhit,M,cstart,cend,colb;
2279   PetscInt              *columnsforrow,l;
2280   IS                    *isa;
2281   PetscBool              done,flg;
2282   ISLocalToGlobalMapping map = mat->cbmapping;
2283   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2284 
2285   PetscFunctionBegin;
2286   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2287   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2288 
2289   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2290   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2291   M                = mat->rmap->n/bs;
2292   cstart           = mat->cmap->rstart/bs;
2293   cend             = mat->cmap->rend/bs;
2294   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2295   c->N             = mat->cmap->N/bs;
2296   c->m             = mat->rmap->n/bs;
2297   c->rstart        = mat->rmap->rstart/bs;
2298 
2299   c->ncolors       = nis;
2300   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2301   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2302   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2303   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2304   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2305   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2306 
2307   /* Allow access to data structures of local part of matrix */
2308   if (!baij->colmap) {
2309     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2310   }
2311   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2312   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2313 
2314   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2315   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2316 
2317   for (i=0; i<nis; i++) {
2318     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2319     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2320     c->ncolumns[i] = n;
2321     if (n) {
2322       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2323       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2324       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2325     } else {
2326       c->columns[i]  = 0;
2327     }
2328 
2329     if (ctype == IS_COLORING_GLOBAL){
2330       /* Determine the total (parallel) number of columns of this color */
2331       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2332       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2333 
2334       nn   = PetscMPIIntCast(n);
2335       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2336       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2337       if (!nctot) {
2338         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2339       }
2340 
2341       disp[0] = 0;
2342       for (j=1; j<size; j++) {
2343         disp[j] = disp[j-1] + ncolsonproc[j-1];
2344       }
2345 
2346       /* Get complete list of columns for color on each processor */
2347       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2348       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2349       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2350     } else if (ctype == IS_COLORING_GHOSTED){
2351       /* Determine local number of columns of this color on this process, including ghost points */
2352       nctot = n;
2353       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2354       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2355     } else {
2356       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2357     }
2358 
2359     /*
2360        Mark all rows affect by these columns
2361     */
2362     /* Temporary option to allow for debugging/testing */
2363     flg  = PETSC_FALSE;
2364     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2365     if (!flg) {/*-----------------------------------------------------------------------------*/
2366       /* crude, fast version */
2367       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2368       /* loop over columns*/
2369       for (j=0; j<nctot; j++) {
2370         if (ctype == IS_COLORING_GHOSTED) {
2371           col = ltog[cols[j]];
2372         } else {
2373           col  = cols[j];
2374         }
2375         if (col >= cstart && col < cend) {
2376           /* column is in diagonal block of matrix */
2377           rows = A_cj + A_ci[col-cstart];
2378           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2379         } else {
2380 #if defined (PETSC_USE_CTABLE)
2381           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2382 	  colb --;
2383 #else
2384           colb = baij->colmap[col] - 1;
2385 #endif
2386           if (colb == -1) {
2387             m = 0;
2388           } else {
2389             colb = colb/bs;
2390             rows = B_cj + B_ci[colb];
2391             m    = B_ci[colb+1] - B_ci[colb];
2392           }
2393         }
2394         /* loop over columns marking them in rowhit */
2395         for (k=0; k<m; k++) {
2396           rowhit[*rows++] = col + 1;
2397         }
2398       }
2399 
2400       /* count the number of hits */
2401       nrows = 0;
2402       for (j=0; j<M; j++) {
2403         if (rowhit[j]) nrows++;
2404       }
2405       c->nrows[i]         = nrows;
2406       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2407       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2408       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2409       nrows = 0;
2410       for (j=0; j<M; j++) {
2411         if (rowhit[j]) {
2412           c->rows[i][nrows]           = j;
2413           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2414           nrows++;
2415         }
2416       }
2417     } else {/*-------------------------------------------------------------------------------*/
2418       /* slow version, using rowhit as a linked list */
2419       PetscInt currentcol,fm,mfm;
2420       rowhit[M] = M;
2421       nrows     = 0;
2422       /* loop over columns*/
2423       for (j=0; j<nctot; j++) {
2424         if (ctype == IS_COLORING_GHOSTED) {
2425           col = ltog[cols[j]];
2426         } else {
2427           col  = cols[j];
2428         }
2429         if (col >= cstart && col < cend) {
2430           /* column is in diagonal block of matrix */
2431           rows = A_cj + A_ci[col-cstart];
2432           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2433         } else {
2434 #if defined (PETSC_USE_CTABLE)
2435           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2436           colb --;
2437 #else
2438           colb = baij->colmap[col] - 1;
2439 #endif
2440           if (colb == -1) {
2441             m = 0;
2442           } else {
2443             colb = colb/bs;
2444             rows = B_cj + B_ci[colb];
2445             m    = B_ci[colb+1] - B_ci[colb];
2446           }
2447         }
2448 
2449         /* loop over columns marking them in rowhit */
2450         fm    = M; /* fm points to first entry in linked list */
2451         for (k=0; k<m; k++) {
2452           currentcol = *rows++;
2453 	  /* is it already in the list? */
2454           do {
2455             mfm  = fm;
2456             fm   = rowhit[fm];
2457           } while (fm < currentcol);
2458           /* not in list so add it */
2459           if (fm != currentcol) {
2460             nrows++;
2461             columnsforrow[currentcol] = col;
2462             /* next three lines insert new entry into linked list */
2463             rowhit[mfm]               = currentcol;
2464             rowhit[currentcol]        = fm;
2465             fm                        = currentcol;
2466             /* fm points to present position in list since we know the columns are sorted */
2467           } else {
2468             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2469           }
2470         }
2471       }
2472       c->nrows[i]         = nrows;
2473       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2474       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2475       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2476       /* now store the linked list of rows into c->rows[i] */
2477       nrows = 0;
2478       fm    = rowhit[M];
2479       do {
2480         c->rows[i][nrows]            = fm;
2481         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2482         fm                           = rowhit[fm];
2483       } while (fm < M);
2484     } /* ---------------------------------------------------------------------------------------*/
2485     ierr = PetscFree(cols);CHKERRQ(ierr);
2486   }
2487 
2488   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2489   /*
2490        vscale will contain the "diagonal" on processor scalings followed by the off processor
2491   */
2492   if (ctype == IS_COLORING_GLOBAL) {
2493     PetscInt *garray;
2494     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2495     for (i=0; i<baij->B->cmap->n/bs; i++) {
2496       for (j=0; j<bs; j++) {
2497         garray[i*bs+j] = bs*baij->garray[i]+j;
2498       }
2499     }
2500     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2501     ierr = PetscFree(garray);CHKERRQ(ierr);
2502     CHKMEMQ;
2503     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2504     for (k=0; k<c->ncolors; k++) {
2505       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2506       for (l=0; l<c->nrows[k]; l++) {
2507         col = c->columnsforrow[k][l];
2508         if (col >= cstart && col < cend) {
2509           /* column is in diagonal block of matrix */
2510           colb = col - cstart;
2511         } else {
2512           /* column  is in "off-processor" part */
2513 #if defined (PETSC_USE_CTABLE)
2514           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2515           colb --;
2516 #else
2517           colb = baij->colmap[col] - 1;
2518 #endif
2519           colb = colb/bs;
2520           colb += cend - cstart;
2521         }
2522         c->vscaleforrow[k][l] = colb;
2523       }
2524     }
2525   } else if (ctype == IS_COLORING_GHOSTED) {
2526     /* Get gtol mapping */
2527     PetscInt N = mat->cmap->N, *gtol;
2528     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2529     for (i=0; i<N; i++) gtol[i] = -1;
2530     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2531 
2532     c->vscale = 0; /* will be created in MatFDColoringApply() */
2533     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2534     for (k=0; k<c->ncolors; k++) {
2535       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2536       for (l=0; l<c->nrows[k]; l++) {
2537         col = c->columnsforrow[k][l];      /* global column index */
2538         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2539       }
2540     }
2541     ierr = PetscFree(gtol);CHKERRQ(ierr);
2542   }
2543   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2544 
2545   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2546   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2547   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2548   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2549     CHKMEMQ;
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2555 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2556 {
2557   Mat            B;
2558   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2559   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2560   Mat_SeqAIJ     *b;
2561   PetscErrorCode ierr;
2562   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2563   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2564   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2565 
2566   PetscFunctionBegin;
2567   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2568   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2569 
2570   /* ----------------------------------------------------------------
2571      Tell every processor the number of nonzeros per row
2572   */
2573   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2574   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2575     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2576   }
2577   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2578   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2579   displs     = recvcounts + size;
2580   for (i=0; i<size; i++) {
2581     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2582     displs[i]     = A->rmap->range[i]/bs;
2583   }
2584 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2585   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2586 #else
2587   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2588 #endif
2589   /* ---------------------------------------------------------------
2590      Create the sequential matrix of the same type as the local block diagonal
2591   */
2592   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2593   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2594   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2595   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2596   b = (Mat_SeqAIJ *)B->data;
2597 
2598   /*--------------------------------------------------------------------
2599     Copy my part of matrix column indices over
2600   */
2601   sendcount  = ad->nz + bd->nz;
2602   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2603   a_jsendbuf = ad->j;
2604   b_jsendbuf = bd->j;
2605   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2606   cnt        = 0;
2607   for (i=0; i<n; i++) {
2608 
2609     /* put in lower diagonal portion */
2610     m = bd->i[i+1] - bd->i[i];
2611     while (m > 0) {
2612       /* is it above diagonal (in bd (compressed) numbering) */
2613       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2614       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2615       m--;
2616     }
2617 
2618     /* put in diagonal portion */
2619     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2620       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2621     }
2622 
2623     /* put in upper diagonal portion */
2624     while (m-- > 0) {
2625       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2626     }
2627   }
2628   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2629 
2630   /*--------------------------------------------------------------------
2631     Gather all column indices to all processors
2632   */
2633   for (i=0; i<size; i++) {
2634     recvcounts[i] = 0;
2635     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2636       recvcounts[i] += lens[j];
2637     }
2638   }
2639   displs[0]  = 0;
2640   for (i=1; i<size; i++) {
2641     displs[i] = displs[i-1] + recvcounts[i-1];
2642   }
2643 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2644   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2645 #else
2646   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2647 #endif
2648   /*--------------------------------------------------------------------
2649     Assemble the matrix into useable form (note numerical values not yet set)
2650   */
2651   /* set the b->ilen (length of each row) values */
2652   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2653   /* set the b->i indices */
2654   b->i[0] = 0;
2655   for (i=1; i<=A->rmap->N/bs; i++) {
2656     b->i[i] = b->i[i-1] + lens[i-1];
2657   }
2658   ierr = PetscFree(lens);CHKERRQ(ierr);
2659   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2660   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2661   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2662 
2663   if (A->symmetric){
2664     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2665   } else if (A->hermitian) {
2666     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2667   } else if (A->structurally_symmetric) {
2668     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2669   }
2670   *newmat = B;
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatSOR_MPIBAIJ"
2676 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2677 {
2678   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2679   PetscErrorCode ierr;
2680   Vec            bb1 = 0;
2681 
2682   PetscFunctionBegin;
2683   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2684     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2685   }
2686 
2687   if (flag == SOR_APPLY_UPPER) {
2688     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2689     PetscFunctionReturn(0);
2690   }
2691 
2692   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2693     if (flag & SOR_ZERO_INITIAL_GUESS) {
2694       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2695       its--;
2696     }
2697 
2698     while (its--) {
2699       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2700       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2701 
2702       /* update rhs: bb1 = bb - B*x */
2703       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2704       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2705 
2706       /* local sweep */
2707       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2708     }
2709   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2710     if (flag & SOR_ZERO_INITIAL_GUESS) {
2711       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2712       its--;
2713     }
2714     while (its--) {
2715       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2716       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717 
2718       /* update rhs: bb1 = bb - B*x */
2719       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2720       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2721 
2722       /* local sweep */
2723       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2724     }
2725   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2726     if (flag & SOR_ZERO_INITIAL_GUESS) {
2727       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2728       its--;
2729     }
2730     while (its--) {
2731       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2732       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2733 
2734       /* update rhs: bb1 = bb - B*x */
2735       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2736       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2737 
2738       /* local sweep */
2739       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2740     }
2741   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2742 
2743   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2744   PetscFunctionReturn(0);
2745 }
2746 
2747 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2748 
2749 
2750 /* -------------------------------------------------------------------*/
2751 static struct _MatOps MatOps_Values = {
2752        MatSetValues_MPIBAIJ,
2753        MatGetRow_MPIBAIJ,
2754        MatRestoreRow_MPIBAIJ,
2755        MatMult_MPIBAIJ,
2756 /* 4*/ MatMultAdd_MPIBAIJ,
2757        MatMultTranspose_MPIBAIJ,
2758        MatMultTransposeAdd_MPIBAIJ,
2759        0,
2760        0,
2761        0,
2762 /*10*/ 0,
2763        0,
2764        0,
2765        MatSOR_MPIBAIJ,
2766        MatTranspose_MPIBAIJ,
2767 /*15*/ MatGetInfo_MPIBAIJ,
2768        MatEqual_MPIBAIJ,
2769        MatGetDiagonal_MPIBAIJ,
2770        MatDiagonalScale_MPIBAIJ,
2771        MatNorm_MPIBAIJ,
2772 /*20*/ MatAssemblyBegin_MPIBAIJ,
2773        MatAssemblyEnd_MPIBAIJ,
2774        MatSetOption_MPIBAIJ,
2775        MatZeroEntries_MPIBAIJ,
2776 /*24*/ MatZeroRows_MPIBAIJ,
2777        0,
2778        0,
2779        0,
2780        0,
2781 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2782        0,
2783        0,
2784        0,
2785        0,
2786 /*34*/ MatDuplicate_MPIBAIJ,
2787        0,
2788        0,
2789        0,
2790        0,
2791 /*39*/ MatAXPY_MPIBAIJ,
2792        MatGetSubMatrices_MPIBAIJ,
2793        MatIncreaseOverlap_MPIBAIJ,
2794        MatGetValues_MPIBAIJ,
2795        MatCopy_MPIBAIJ,
2796 /*44*/ 0,
2797        MatScale_MPIBAIJ,
2798        0,
2799        0,
2800        0,
2801 /*49*/ MatSetBlockSize_MPIBAIJ,
2802        0,
2803        0,
2804        0,
2805        0,
2806 /*54*/ MatFDColoringCreate_MPIBAIJ,
2807        0,
2808        MatSetUnfactored_MPIBAIJ,
2809        MatPermute_MPIBAIJ,
2810        MatSetValuesBlocked_MPIBAIJ,
2811 /*59*/ MatGetSubMatrix_MPIBAIJ,
2812        MatDestroy_MPIBAIJ,
2813        MatView_MPIBAIJ,
2814        0,
2815        0,
2816 /*64*/ 0,
2817        0,
2818        0,
2819        0,
2820        0,
2821 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2822        0,
2823        0,
2824        0,
2825        0,
2826 /*74*/ 0,
2827        MatFDColoringApply_BAIJ,
2828        0,
2829        0,
2830        0,
2831 /*79*/ 0,
2832        0,
2833        0,
2834        0,
2835        MatLoad_MPIBAIJ,
2836 /*84*/ 0,
2837        0,
2838        0,
2839        0,
2840        0,
2841 /*89*/ 0,
2842        0,
2843        0,
2844        0,
2845        0,
2846 /*94*/ 0,
2847        0,
2848        0,
2849        0,
2850        0,
2851 /*99*/ 0,
2852        0,
2853        0,
2854        0,
2855        0,
2856 /*104*/0,
2857        MatRealPart_MPIBAIJ,
2858        MatImaginaryPart_MPIBAIJ,
2859        0,
2860        0,
2861 /*109*/0,
2862        0,
2863        0,
2864        0,
2865        0,
2866 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2867        0,
2868        MatGetGhosts_MPIBAIJ,
2869        0,
2870        0,
2871 /*119*/0,
2872        0,
2873        0,
2874        0
2875 };
2876 
2877 EXTERN_C_BEGIN
2878 #undef __FUNCT__
2879 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2880 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *a)
2881 {
2882   PetscFunctionBegin;
2883   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2884   *iscopy = PETSC_FALSE;
2885   PetscFunctionReturn(0);
2886 }
2887 EXTERN_C_END
2888 
2889 EXTERN_C_BEGIN
2890 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2891 EXTERN_C_END
2892 
2893 EXTERN_C_BEGIN
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2896 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2897 {
2898   PetscInt       m,rstart,cstart,cend;
2899   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2900   const PetscInt *JJ=0;
2901   PetscScalar    *values=0;
2902   PetscErrorCode ierr;
2903 
2904   PetscFunctionBegin;
2905 
2906   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2907   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2908   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2909   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2910   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2911   m      = B->rmap->n/bs;
2912   rstart = B->rmap->rstart/bs;
2913   cstart = B->cmap->rstart/bs;
2914   cend   = B->cmap->rend/bs;
2915 
2916   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2917   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2918   for (i=0; i<m; i++) {
2919     nz = ii[i+1] - ii[i];
2920     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2921     nz_max = PetscMax(nz_max,nz);
2922     JJ  = jj + ii[i];
2923     for (j=0; j<nz; j++) {
2924       if (*JJ >= cstart) break;
2925       JJ++;
2926     }
2927     d = 0;
2928     for (; j<nz; j++) {
2929       if (*JJ++ >= cend) break;
2930       d++;
2931     }
2932     d_nnz[i] = d;
2933     o_nnz[i] = nz - d;
2934   }
2935   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2936   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2937 
2938   values = (PetscScalar*)V;
2939   if (!values) {
2940     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2941     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2942   }
2943   for (i=0; i<m; i++) {
2944     PetscInt          row    = i + rstart;
2945     PetscInt          ncols  = ii[i+1] - ii[i];
2946     const PetscInt    *icols = jj + ii[i];
2947     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2948     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2949   }
2950 
2951   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2952   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2953   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2954 
2955   PetscFunctionReturn(0);
2956 }
2957 EXTERN_C_END
2958 
2959 #undef __FUNCT__
2960 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2961 /*@C
2962    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2963    (the default parallel PETSc format).
2964 
2965    Collective on MPI_Comm
2966 
2967    Input Parameters:
2968 +  A - the matrix
2969 .  bs - the block size
2970 .  i - the indices into j for the start of each local row (starts with zero)
2971 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2972 -  v - optional values in the matrix
2973 
2974    Level: developer
2975 
2976 .keywords: matrix, aij, compressed row, sparse, parallel
2977 
2978 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2979 @*/
2980 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2981 {
2982   PetscErrorCode ierr;
2983 
2984   PetscFunctionBegin;
2985   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 EXTERN_C_BEGIN
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2992 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2993 {
2994   Mat_MPIBAIJ    *b;
2995   PetscErrorCode ierr;
2996   PetscInt       i, newbs = PetscAbs(bs);
2997 
2998   PetscFunctionBegin;
2999   if (bs < 0) {
3000     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
3001       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3002     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3003     bs   = PetscAbs(bs);
3004   }
3005   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3006   bs = newbs;
3007 
3008 
3009   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3010   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3011   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3012   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3013   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3014 
3015   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3016   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3017   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3018   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3019 
3020   if (d_nnz) {
3021     for (i=0; i<B->rmap->n/bs; i++) {
3022       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3023     }
3024   }
3025   if (o_nnz) {
3026     for (i=0; i<B->rmap->n/bs; i++) {
3027       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3028     }
3029   }
3030 
3031   b = (Mat_MPIBAIJ*)B->data;
3032   b->bs2 = bs*bs;
3033   b->mbs = B->rmap->n/bs;
3034   b->nbs = B->cmap->n/bs;
3035   b->Mbs = B->rmap->N/bs;
3036   b->Nbs = B->cmap->N/bs;
3037 
3038   for (i=0; i<=b->size; i++) {
3039     b->rangebs[i] = B->rmap->range[i]/bs;
3040   }
3041   b->rstartbs = B->rmap->rstart/bs;
3042   b->rendbs   = B->rmap->rend/bs;
3043   b->cstartbs = B->cmap->rstart/bs;
3044   b->cendbs   = B->cmap->rend/bs;
3045 
3046   if (!B->preallocated) {
3047     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3048     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3049     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3050     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3051     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3052     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3053     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3054     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3055     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3056   }
3057 
3058   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3059   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3060   B->preallocated = PETSC_TRUE;
3061   PetscFunctionReturn(0);
3062 }
3063 EXTERN_C_END
3064 
3065 EXTERN_C_BEGIN
3066 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3067 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3068 EXTERN_C_END
3069 
3070 
3071 EXTERN_C_BEGIN
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3074 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3075 {
3076   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3077   PetscErrorCode ierr;
3078   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3079   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3080   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3081 
3082   PetscFunctionBegin;
3083   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3084   ii[0] = 0;
3085   CHKMEMQ;
3086   for (i=0; i<M; i++) {
3087     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3088     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3089     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3090     /* remove one from count of matrix has diagonal */
3091     for (j=id[i]; j<id[i+1]; j++) {
3092       if (jd[j] == i) {ii[i+1]--;break;}
3093     }
3094   CHKMEMQ;
3095   }
3096   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3097   cnt = 0;
3098   for (i=0; i<M; i++) {
3099     for (j=io[i]; j<io[i+1]; j++) {
3100       if (garray[jo[j]] > rstart) break;
3101       jj[cnt++] = garray[jo[j]];
3102   CHKMEMQ;
3103     }
3104     for (k=id[i]; k<id[i+1]; k++) {
3105       if (jd[k] != i) {
3106         jj[cnt++] = rstart + jd[k];
3107   CHKMEMQ;
3108       }
3109     }
3110     for (;j<io[i+1]; j++) {
3111       jj[cnt++] = garray[jo[j]];
3112   CHKMEMQ;
3113     }
3114   }
3115   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3116   PetscFunctionReturn(0);
3117 }
3118 EXTERN_C_END
3119 
3120 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3121 EXTERN_C_BEGIN
3122 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3123 EXTERN_C_END
3124 
3125 EXTERN_C_BEGIN
3126 #undef __FUNCT__
3127 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3128 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3129 {
3130   PetscErrorCode ierr;
3131   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3132   Mat            B;
3133   Mat_MPIAIJ     *b;
3134 
3135   PetscFunctionBegin;
3136   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3137 
3138   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3139   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3140   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3141   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3142   b = (Mat_MPIAIJ*) B->data;
3143 
3144   ierr = MatDestroy(b->A);CHKERRQ(ierr);
3145   ierr = MatDestroy(b->B);CHKERRQ(ierr);
3146   ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3147   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3148   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3149   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3150   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3151   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3152   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3153   if (reuse == MAT_REUSE_MATRIX) {
3154     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3155   } else {
3156    *newmat = B;
3157   }
3158   PetscFunctionReturn(0);
3159 }
3160 EXTERN_C_END
3161 
3162 EXTERN_C_BEGIN
3163 #if defined(PETSC_HAVE_MUMPS)
3164 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3165 #endif
3166 EXTERN_C_END
3167 
3168 /*MC
3169    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3170 
3171    Options Database Keys:
3172 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3173 . -mat_block_size <bs> - set the blocksize used to store the matrix
3174 - -mat_use_hash_table <fact>
3175 
3176   Level: beginner
3177 
3178 .seealso: MatCreateMPIBAIJ
3179 M*/
3180 
3181 EXTERN_C_BEGIN
3182 #undef __FUNCT__
3183 #define __FUNCT__ "MatCreate_MPIBAIJ"
3184 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3185 {
3186   Mat_MPIBAIJ    *b;
3187   PetscErrorCode ierr;
3188   PetscBool      flg;
3189 
3190   PetscFunctionBegin;
3191   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3192   B->data = (void*)b;
3193 
3194   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3195   B->assembled  = PETSC_FALSE;
3196 
3197   B->insertmode = NOT_SET_VALUES;
3198   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3199   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3200 
3201   /* build local table of row and column ownerships */
3202   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3203 
3204   /* build cache for off array entries formed */
3205   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3206   b->donotstash  = PETSC_FALSE;
3207   b->colmap      = PETSC_NULL;
3208   b->garray      = PETSC_NULL;
3209   b->roworiented = PETSC_TRUE;
3210 
3211   /* stuff used in block assembly */
3212   b->barray       = 0;
3213 
3214   /* stuff used for matrix vector multiply */
3215   b->lvec         = 0;
3216   b->Mvctx        = 0;
3217 
3218   /* stuff for MatGetRow() */
3219   b->rowindices   = 0;
3220   b->rowvalues    = 0;
3221   b->getrowactive = PETSC_FALSE;
3222 
3223   /* hash table stuff */
3224   b->ht           = 0;
3225   b->hd           = 0;
3226   b->ht_size      = 0;
3227   b->ht_flag      = PETSC_FALSE;
3228   b->ht_fact      = 0;
3229   b->ht_total_ct  = 0;
3230   b->ht_insert_ct = 0;
3231 
3232   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3233   b->ijonly       = PETSC_FALSE;
3234 
3235   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3236     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3237     if (flg) {
3238       PetscReal fact = 1.39;
3239       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3240       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3241       if (fact <= 1.0) fact = 1.39;
3242       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3243       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3244     }
3245   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3246 
3247 #if defined(PETSC_HAVE_MUMPS)
3248   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3249 #endif
3250   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3251                                      "MatConvert_MPIBAIJ_MPIAdj",
3252                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3253   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3254                                      "MatConvert_MPIBAIJ_MPIAIJ",
3255                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3256   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3257                                      "MatStoreValues_MPIBAIJ",
3258                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3259   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3260                                      "MatRetrieveValues_MPIBAIJ",
3261                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3262   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3263                                      "MatGetDiagonalBlock_MPIBAIJ",
3264                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3265   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3266                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3267                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3268   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3269 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3270 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3271   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3272                                      "MatDiagonalScaleLocal_MPIBAIJ",
3273                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3275                                      "MatSetHashTableFactor_MPIBAIJ",
3276                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3277   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3278   PetscFunctionReturn(0);
3279 }
3280 EXTERN_C_END
3281 
3282 /*MC
3283    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3284 
3285    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3286    and MATMPIBAIJ otherwise.
3287 
3288    Options Database Keys:
3289 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3290 
3291   Level: beginner
3292 
3293 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3294 M*/
3295 
3296 EXTERN_C_BEGIN
3297 #undef __FUNCT__
3298 #define __FUNCT__ "MatCreate_BAIJ"
3299 PetscErrorCode  MatCreate_BAIJ(Mat A)
3300 {
3301   PetscErrorCode ierr;
3302   PetscMPIInt    size;
3303 
3304   PetscFunctionBegin;
3305   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3306   if (size == 1) {
3307     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3308   } else {
3309     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3310   }
3311   PetscFunctionReturn(0);
3312 }
3313 EXTERN_C_END
3314 
3315 #undef __FUNCT__
3316 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3317 /*@C
3318    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3319    (block compressed row).  For good matrix assembly performance
3320    the user should preallocate the matrix storage by setting the parameters
3321    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3322    performance can be increased by more than a factor of 50.
3323 
3324    Collective on Mat
3325 
3326    Input Parameters:
3327 +  A - the matrix
3328 .  bs   - size of blockk
3329 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3330            submatrix  (same for all local rows)
3331 .  d_nnz - array containing the number of block nonzeros in the various block rows
3332            of the in diagonal portion of the local (possibly different for each block
3333            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3334 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3335            submatrix (same for all local rows).
3336 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3337            off-diagonal portion of the local submatrix (possibly different for
3338            each block row) or PETSC_NULL.
3339 
3340    If the *_nnz parameter is given then the *_nz parameter is ignored
3341 
3342    Options Database Keys:
3343 +   -mat_block_size - size of the blocks to use
3344 -   -mat_use_hash_table <fact>
3345 
3346    Notes:
3347    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3348    than it must be used on all processors that share the object for that argument.
3349 
3350    Storage Information:
3351    For a square global matrix we define each processor's diagonal portion
3352    to be its local rows and the corresponding columns (a square submatrix);
3353    each processor's off-diagonal portion encompasses the remainder of the
3354    local matrix (a rectangular submatrix).
3355 
3356    The user can specify preallocated storage for the diagonal part of
3357    the local submatrix with either d_nz or d_nnz (not both).  Set
3358    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3359    memory allocation.  Likewise, specify preallocated storage for the
3360    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3361 
3362    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3363    the figure below we depict these three local rows and all columns (0-11).
3364 
3365 .vb
3366            0 1 2 3 4 5 6 7 8 9 10 11
3367           -------------------
3368    row 3  |  o o o d d d o o o o o o
3369    row 4  |  o o o d d d o o o o o o
3370    row 5  |  o o o d d d o o o o o o
3371           -------------------
3372 .ve
3373 
3374    Thus, any entries in the d locations are stored in the d (diagonal)
3375    submatrix, and any entries in the o locations are stored in the
3376    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3377    stored simply in the MATSEQBAIJ format for compressed row storage.
3378 
3379    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3380    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3381    In general, for PDE problems in which most nonzeros are near the diagonal,
3382    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3383    or you will get TERRIBLE performance; see the users' manual chapter on
3384    matrices.
3385 
3386    You can call MatGetInfo() to get information on how effective the preallocation was;
3387    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3388    You can also run with the option -info and look for messages with the string
3389    malloc in them to see if additional memory allocation was needed.
3390 
3391    Level: intermediate
3392 
3393 .keywords: matrix, block, aij, compressed row, sparse, parallel
3394 
3395 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3396 @*/
3397 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3398 {
3399   PetscErrorCode ierr;
3400 
3401   PetscFunctionBegin;
3402   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3403   PetscFunctionReturn(0);
3404 }
3405 
3406 #undef __FUNCT__
3407 #define __FUNCT__ "MatCreateMPIBAIJ"
3408 /*@C
3409    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3410    (block compressed row).  For good matrix assembly performance
3411    the user should preallocate the matrix storage by setting the parameters
3412    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3413    performance can be increased by more than a factor of 50.
3414 
3415    Collective on MPI_Comm
3416 
3417    Input Parameters:
3418 +  comm - MPI communicator
3419 .  bs   - size of blockk
3420 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3421            This value should be the same as the local size used in creating the
3422            y vector for the matrix-vector product y = Ax.
3423 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3424            This value should be the same as the local size used in creating the
3425            x vector for the matrix-vector product y = Ax.
3426 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3427 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3428 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3429            submatrix  (same for all local rows)
3430 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3431            of the in diagonal portion of the local (possibly different for each block
3432            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3433 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3434            submatrix (same for all local rows).
3435 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3436            off-diagonal portion of the local submatrix (possibly different for
3437            each block row) or PETSC_NULL.
3438 
3439    Output Parameter:
3440 .  A - the matrix
3441 
3442    Options Database Keys:
3443 +   -mat_block_size - size of the blocks to use
3444 -   -mat_use_hash_table <fact>
3445 
3446    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3447    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3448    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3449 
3450    Notes:
3451    If the *_nnz parameter is given then the *_nz parameter is ignored
3452 
3453    A nonzero block is any block that as 1 or more nonzeros in it
3454 
3455    The user MUST specify either the local or global matrix dimensions
3456    (possibly both).
3457 
3458    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3459    than it must be used on all processors that share the object for that argument.
3460 
3461    Storage Information:
3462    For a square global matrix we define each processor's diagonal portion
3463    to be its local rows and the corresponding columns (a square submatrix);
3464    each processor's off-diagonal portion encompasses the remainder of the
3465    local matrix (a rectangular submatrix).
3466 
3467    The user can specify preallocated storage for the diagonal part of
3468    the local submatrix with either d_nz or d_nnz (not both).  Set
3469    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3470    memory allocation.  Likewise, specify preallocated storage for the
3471    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3472 
3473    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3474    the figure below we depict these three local rows and all columns (0-11).
3475 
3476 .vb
3477            0 1 2 3 4 5 6 7 8 9 10 11
3478           -------------------
3479    row 3  |  o o o d d d o o o o o o
3480    row 4  |  o o o d d d o o o o o o
3481    row 5  |  o o o d d d o o o o o o
3482           -------------------
3483 .ve
3484 
3485    Thus, any entries in the d locations are stored in the d (diagonal)
3486    submatrix, and any entries in the o locations are stored in the
3487    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3488    stored simply in the MATSEQBAIJ format for compressed row storage.
3489 
3490    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3491    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3492    In general, for PDE problems in which most nonzeros are near the diagonal,
3493    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3494    or you will get TERRIBLE performance; see the users' manual chapter on
3495    matrices.
3496 
3497    Level: intermediate
3498 
3499 .keywords: matrix, block, aij, compressed row, sparse, parallel
3500 
3501 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3502 @*/
3503 PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3504 {
3505   PetscErrorCode ierr;
3506   PetscMPIInt    size;
3507 
3508   PetscFunctionBegin;
3509   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3510   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3511   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3512   if (size > 1) {
3513     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3514     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3515   } else {
3516     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3517     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3518   }
3519   PetscFunctionReturn(0);
3520 }
3521 
3522 #undef __FUNCT__
3523 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3524 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3525 {
3526   Mat            mat;
3527   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3528   PetscErrorCode ierr;
3529   PetscInt       len=0;
3530 
3531   PetscFunctionBegin;
3532   *newmat       = 0;
3533   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3534   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3535   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3536   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3537 
3538   mat->factortype   = matin->factortype;
3539   mat->preallocated = PETSC_TRUE;
3540   mat->assembled    = PETSC_TRUE;
3541   mat->insertmode   = NOT_SET_VALUES;
3542 
3543   a      = (Mat_MPIBAIJ*)mat->data;
3544   mat->rmap->bs  = matin->rmap->bs;
3545   a->bs2   = oldmat->bs2;
3546   a->mbs   = oldmat->mbs;
3547   a->nbs   = oldmat->nbs;
3548   a->Mbs   = oldmat->Mbs;
3549   a->Nbs   = oldmat->Nbs;
3550 
3551   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3552   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3553 
3554   a->size         = oldmat->size;
3555   a->rank         = oldmat->rank;
3556   a->donotstash   = oldmat->donotstash;
3557   a->roworiented  = oldmat->roworiented;
3558   a->rowindices   = 0;
3559   a->rowvalues    = 0;
3560   a->getrowactive = PETSC_FALSE;
3561   a->barray       = 0;
3562   a->rstartbs     = oldmat->rstartbs;
3563   a->rendbs       = oldmat->rendbs;
3564   a->cstartbs     = oldmat->cstartbs;
3565   a->cendbs       = oldmat->cendbs;
3566 
3567   /* hash table stuff */
3568   a->ht           = 0;
3569   a->hd           = 0;
3570   a->ht_size      = 0;
3571   a->ht_flag      = oldmat->ht_flag;
3572   a->ht_fact      = oldmat->ht_fact;
3573   a->ht_total_ct  = 0;
3574   a->ht_insert_ct = 0;
3575 
3576   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3577   if (oldmat->colmap) {
3578 #if defined (PETSC_USE_CTABLE)
3579   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3580 #else
3581   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3582   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3583   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3584 #endif
3585   } else a->colmap = 0;
3586 
3587   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3588     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3589     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3590     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3591   } else a->garray = 0;
3592 
3593   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3594   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3595   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3596   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3597   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3598 
3599   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3600   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3601   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3602   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3603   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3604   *newmat = mat;
3605 
3606   PetscFunctionReturn(0);
3607 }
3608 
3609 #undef __FUNCT__
3610 #define __FUNCT__ "MatLoad_MPIBAIJ"
3611 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3612 {
3613   PetscErrorCode ierr;
3614   int            fd;
3615   PetscInt       i,nz,j,rstart,rend;
3616   PetscScalar    *vals,*buf;
3617   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3618   MPI_Status     status;
3619   PetscMPIInt    rank,size,maxnz;
3620   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3621   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3622   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3623   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3624   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3625   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3626 
3627   PetscFunctionBegin;
3628   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3629     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3630   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3631 
3632   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3633   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3634   if (!rank) {
3635     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3636     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3637     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3638   }
3639 
3640   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3641 
3642   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3643   M = header[1]; N = header[2];
3644 
3645   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3646   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3647   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3648 
3649   /* If global sizes are set, check if they are consistent with that given in the file */
3650   if (sizesset) {
3651     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3652   }
3653   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3654   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3655 
3656   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3657 
3658   /*
3659      This code adds extra rows to make sure the number of rows is
3660      divisible by the blocksize
3661   */
3662   Mbs        = M/bs;
3663   extra_rows = bs - M + bs*Mbs;
3664   if (extra_rows == bs) extra_rows = 0;
3665   else                  Mbs++;
3666   if (extra_rows && !rank) {
3667     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3668   }
3669 
3670   /* determine ownership of all rows */
3671   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3672     mbs        = Mbs/size + ((Mbs % size) > rank);
3673     m          = mbs*bs;
3674   } else { /* User set */
3675     m          = newmat->rmap->n;
3676     mbs        = m/bs;
3677   }
3678   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3679   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3680 
3681   /* process 0 needs enough room for process with most rows */
3682   if (!rank) {
3683     mmax = rowners[1];
3684     for (i=2; i<size; i++) {
3685       mmax = PetscMax(mmax,rowners[i]);
3686     }
3687     mmax*=bs;
3688   } else mmax = m;
3689 
3690   rowners[0] = 0;
3691   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3692   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3693   rstart = rowners[rank];
3694   rend   = rowners[rank+1];
3695 
3696   /* distribute row lengths to all processors */
3697   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3698   if (!rank) {
3699     mend = m;
3700     if (size == 1) mend = mend - extra_rows;
3701     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3702     for (j=mend; j<m; j++) locrowlens[j] = 1;
3703     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3704     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3705     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3706     for (j=0; j<m; j++) {
3707       procsnz[0] += locrowlens[j];
3708     }
3709     for (i=1; i<size; i++) {
3710       mend = browners[i+1] - browners[i];
3711       if (i == size-1) mend = mend - extra_rows;
3712       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3713       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3714       /* calculate the number of nonzeros on each processor */
3715       for (j=0; j<browners[i+1]-browners[i]; j++) {
3716         procsnz[i] += rowlengths[j];
3717       }
3718       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3719     }
3720     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3721   } else {
3722     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3723   }
3724 
3725   if (!rank) {
3726     /* determine max buffer needed and allocate it */
3727     maxnz = procsnz[0];
3728     for (i=1; i<size; i++) {
3729       maxnz = PetscMax(maxnz,procsnz[i]);
3730     }
3731     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3732 
3733     /* read in my part of the matrix column indices  */
3734     nz     = procsnz[0];
3735     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3736     mycols = ibuf;
3737     if (size == 1)  nz -= extra_rows;
3738     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3739     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3740 
3741     /* read in every ones (except the last) and ship off */
3742     for (i=1; i<size-1; i++) {
3743       nz   = procsnz[i];
3744       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3745       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3746     }
3747     /* read in the stuff for the last proc */
3748     if (size != 1) {
3749       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3750       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3751       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3752       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3753     }
3754     ierr = PetscFree(cols);CHKERRQ(ierr);
3755   } else {
3756     /* determine buffer space needed for message */
3757     nz = 0;
3758     for (i=0; i<m; i++) {
3759       nz += locrowlens[i];
3760     }
3761     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3762     mycols = ibuf;
3763     /* receive message of column indices*/
3764     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3765     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3766     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3767   }
3768 
3769   /* loop over local rows, determining number of off diagonal entries */
3770   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3771   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3772   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3773   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3774   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3775   rowcount = 0; nzcount = 0;
3776   for (i=0; i<mbs; i++) {
3777     dcount  = 0;
3778     odcount = 0;
3779     for (j=0; j<bs; j++) {
3780       kmax = locrowlens[rowcount];
3781       for (k=0; k<kmax; k++) {
3782         tmp = mycols[nzcount++]/bs;
3783         if (!mask[tmp]) {
3784           mask[tmp] = 1;
3785           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3786           else masked1[dcount++] = tmp;
3787         }
3788       }
3789       rowcount++;
3790     }
3791 
3792     dlens[i]  = dcount;
3793     odlens[i] = odcount;
3794 
3795     /* zero out the mask elements we set */
3796     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3797     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3798   }
3799 
3800 
3801   if (!sizesset) {
3802     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3803   }
3804   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3805 
3806   if (!rank) {
3807     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3808     /* read in my part of the matrix numerical values  */
3809     nz = procsnz[0];
3810     vals = buf;
3811     mycols = ibuf;
3812     if (size == 1)  nz -= extra_rows;
3813     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3814     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3815 
3816     /* insert into matrix */
3817     jj      = rstart*bs;
3818     for (i=0; i<m; i++) {
3819       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3820       mycols += locrowlens[i];
3821       vals   += locrowlens[i];
3822       jj++;
3823     }
3824     /* read in other processors (except the last one) and ship out */
3825     for (i=1; i<size-1; i++) {
3826       nz   = procsnz[i];
3827       vals = buf;
3828       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3829       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3830     }
3831     /* the last proc */
3832     if (size != 1){
3833       nz   = procsnz[i] - extra_rows;
3834       vals = buf;
3835       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3836       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3837       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3838     }
3839     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3840   } else {
3841     /* receive numeric values */
3842     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3843 
3844     /* receive message of values*/
3845     vals   = buf;
3846     mycols = ibuf;
3847     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3848     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3849     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3850 
3851     /* insert into matrix */
3852     jj      = rstart*bs;
3853     for (i=0; i<m; i++) {
3854       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3855       mycols += locrowlens[i];
3856       vals   += locrowlens[i];
3857       jj++;
3858     }
3859   }
3860   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3861   ierr = PetscFree(buf);CHKERRQ(ierr);
3862   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3863   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3864   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3865   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3866   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3867   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3868 
3869   PetscFunctionReturn(0);
3870 }
3871 
3872 #undef __FUNCT__
3873 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3874 /*@
3875    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3876 
3877    Input Parameters:
3878 .  mat  - the matrix
3879 .  fact - factor
3880 
3881    Not Collective, each process can use a different factor
3882 
3883    Level: advanced
3884 
3885   Notes:
3886    This can also be set by the command line option: -mat_use_hash_table <fact>
3887 
3888 .keywords: matrix, hashtable, factor, HT
3889 
3890 .seealso: MatSetOption()
3891 @*/
3892 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3893 {
3894   PetscErrorCode ierr;
3895 
3896   PetscFunctionBegin;
3897   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3898   PetscFunctionReturn(0);
3899 }
3900 
3901 EXTERN_C_BEGIN
3902 #undef __FUNCT__
3903 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3904 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3905 {
3906   Mat_MPIBAIJ *baij;
3907 
3908   PetscFunctionBegin;
3909   baij = (Mat_MPIBAIJ*)mat->data;
3910   baij->ht_fact = fact;
3911   PetscFunctionReturn(0);
3912 }
3913 EXTERN_C_END
3914 
3915 #undef __FUNCT__
3916 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3917 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3918 {
3919   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3920   PetscFunctionBegin;
3921   *Ad     = a->A;
3922   *Ao     = a->B;
3923   *colmap = a->garray;
3924   PetscFunctionReturn(0);
3925 }
3926 
3927 /*
3928     Special version for direct calls from Fortran (to eliminate two function call overheads
3929 */
3930 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3931 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3932 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3933 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3934 #endif
3935 
3936 #undef __FUNCT__
3937 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3938 /*@C
3939   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3940 
3941   Collective on Mat
3942 
3943   Input Parameters:
3944 + mat - the matrix
3945 . min - number of input rows
3946 . im - input rows
3947 . nin - number of input columns
3948 . in - input columns
3949 . v - numerical values input
3950 - addvin - INSERT_VALUES or ADD_VALUES
3951 
3952   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3953 
3954   Level: advanced
3955 
3956 .seealso:   MatSetValuesBlocked()
3957 @*/
3958 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3959 {
3960   /* convert input arguments to C version */
3961   Mat             mat = *matin;
3962   PetscInt        m = *min, n = *nin;
3963   InsertMode      addv = *addvin;
3964 
3965   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3966   const MatScalar *value;
3967   MatScalar       *barray=baij->barray;
3968   PetscBool       roworiented = baij->roworiented;
3969   PetscErrorCode  ierr;
3970   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3971   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3972   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3973 
3974   PetscFunctionBegin;
3975   /* tasks normally handled by MatSetValuesBlocked() */
3976   if (mat->insertmode == NOT_SET_VALUES) {
3977     mat->insertmode = addv;
3978   }
3979 #if defined(PETSC_USE_DEBUG)
3980   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3981   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3982 #endif
3983   if (mat->assembled) {
3984     mat->was_assembled = PETSC_TRUE;
3985     mat->assembled     = PETSC_FALSE;
3986   }
3987   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3988 
3989 
3990   if(!barray) {
3991     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3992     baij->barray = barray;
3993   }
3994 
3995   if (roworiented) {
3996     stepval = (n-1)*bs;
3997   } else {
3998     stepval = (m-1)*bs;
3999   }
4000   for (i=0; i<m; i++) {
4001     if (im[i] < 0) continue;
4002 #if defined(PETSC_USE_DEBUG)
4003     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4004 #endif
4005     if (im[i] >= rstart && im[i] < rend) {
4006       row = im[i] - rstart;
4007       for (j=0; j<n; j++) {
4008         /* If NumCol = 1 then a copy is not required */
4009         if ((roworiented) && (n == 1)) {
4010           barray = (MatScalar*)v + i*bs2;
4011         } else if((!roworiented) && (m == 1)) {
4012           barray = (MatScalar*)v + j*bs2;
4013         } else { /* Here a copy is required */
4014           if (roworiented) {
4015             value = v + i*(stepval+bs)*bs + j*bs;
4016           } else {
4017             value = v + j*(stepval+bs)*bs + i*bs;
4018           }
4019           for (ii=0; ii<bs; ii++,value+=stepval) {
4020             for (jj=0; jj<bs; jj++) {
4021               *barray++  = *value++;
4022             }
4023           }
4024           barray -=bs2;
4025         }
4026 
4027         if (in[j] >= cstart && in[j] < cend){
4028           col  = in[j] - cstart;
4029           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4030         }
4031         else if (in[j] < 0) continue;
4032 #if defined(PETSC_USE_DEBUG)
4033         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4034 #endif
4035         else {
4036           if (mat->was_assembled) {
4037             if (!baij->colmap) {
4038               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4039             }
4040 
4041 #if defined(PETSC_USE_DEBUG)
4042 #if defined (PETSC_USE_CTABLE)
4043             { PetscInt data;
4044               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4045               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4046             }
4047 #else
4048             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4049 #endif
4050 #endif
4051 #if defined (PETSC_USE_CTABLE)
4052 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4053             col  = (col - 1)/bs;
4054 #else
4055             col = (baij->colmap[in[j]] - 1)/bs;
4056 #endif
4057             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4058               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4059               col =  in[j];
4060             }
4061           }
4062           else col = in[j];
4063           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4064         }
4065       }
4066     } else {
4067       if (!baij->donotstash) {
4068         if (roworiented) {
4069           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4070         } else {
4071           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4072         }
4073       }
4074     }
4075   }
4076 
4077   /* task normally handled by MatSetValuesBlocked() */
4078   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4079   PetscFunctionReturn(0);
4080 }
4081 
4082 #undef __FUNCT__
4083 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4084 /*@
4085      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4086          CSR format the local rows.
4087 
4088    Collective on MPI_Comm
4089 
4090    Input Parameters:
4091 +  comm - MPI communicator
4092 .  bs - the block size, only a block size of 1 is supported
4093 .  m - number of local rows (Cannot be PETSC_DECIDE)
4094 .  n - This value should be the same as the local size used in creating the
4095        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4096        calculated if N is given) For square matrices n is almost always m.
4097 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4098 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4099 .   i - row indices
4100 .   j - column indices
4101 -   a - matrix values
4102 
4103    Output Parameter:
4104 .   mat - the matrix
4105 
4106    Level: intermediate
4107 
4108    Notes:
4109        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4110      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4111      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4112 
4113        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4114 
4115 .keywords: matrix, aij, compressed row, sparse, parallel
4116 
4117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4118           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4119 @*/
4120 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4121 {
4122   PetscErrorCode ierr;
4123 
4124 
4125  PetscFunctionBegin;
4126   if (i[0]) {
4127     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4128   }
4129   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4130   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4131   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4132   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4133   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4134   PetscFunctionReturn(0);
4135 }
4136