xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 3c48a1e8da19189ff2402a4e41a2fc082d52c349)
1 
2 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 #include "petscblaslapack.h"
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
19 {
20   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       i,*idxb = 0;
23   PetscScalar    *va,*vb;
24   Vec            vtmp;
25 
26   PetscFunctionBegin;
27   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
28   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
29   if (idx) {
30     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++){
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
40   }
41 
42   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
43   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
44   ierr = PetscFree(idxb);CHKERRQ(ierr);
45   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 EXTERN_C_BEGIN
50 #undef __FUNCT__
51 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
52 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
53 {
54   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
59   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 EXTERN_C_END
63 
64 EXTERN_C_BEGIN
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 EXTERN_C_END
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined (PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++){
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110 { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115       bcol = col/bs; \
116       ridx = row % bs; cidx = col % bs; \
117       low = 0; high = nrow; \
118       while (high-low > 3) { \
119         t = (low+high)/2; \
120         if (rp[t] > bcol) high = t; \
121         else              low  = t; \
122       } \
123       for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > bcol) break; \
125         if (rp[_i] == bcol) { \
126           bap  = ap +  bs2*_i + bs*cidx + ridx; \
127           if (addv == ADD_VALUES) *bap += value;  \
128           else                    *bap  = value;  \
129           goto a_noinsert; \
130         } \
131       } \
132       if (a->nonew == 1) goto a_noinsert; \
133       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135       N = nrow++ - 1;  \
136       /* shift up all the later entries in this row */ \
137       for (ii=N; ii>=_i; ii--) { \
138         rp[ii+1] = rp[ii]; \
139         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140       } \
141       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142       rp[_i]                      = bcol;  \
143       ap[bs2*_i + bs*cidx + ridx] = value;  \
144       a_noinsert:; \
145     ailen[brow] = nrow; \
146 }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149 { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153       bcol = col/bs; \
154       ridx = row % bs; cidx = col % bs; \
155       low = 0; high = nrow; \
156       while (high-low > 3) { \
157         t = (low+high)/2; \
158         if (rp[t] > bcol) high = t; \
159         else              low  = t; \
160       } \
161       for (_i=low; _i<high; _i++) { \
162         if (rp[_i] > bcol) break; \
163         if (rp[_i] == bcol) { \
164           bap  = ap +  bs2*_i + bs*cidx + ridx; \
165           if (addv == ADD_VALUES) *bap += value;  \
166           else                    *bap  = value;  \
167           goto b_noinsert; \
168         } \
169       } \
170       if (b->nonew == 1) goto b_noinsert; \
171       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173       CHKMEMQ;\
174       N = nrow++ - 1;  \
175       /* shift up all the later entries in this row */ \
176       for (ii=N; ii>=_i; ii--) { \
177         rp[ii+1] = rp[ii]; \
178         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
179       } \
180       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
181       rp[_i]                      = bcol;  \
182       ap[bs2*_i + bs*cidx + ridx] = value;  \
183       b_noinsert:; \
184     bilen[brow] = nrow; \
185 }
186 
187 #undef __FUNCT__
188 #define __FUNCT__ "MatSetValues_MPIBAIJ"
189 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
190 {
191   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
192   MatScalar      value;
193   PetscBool      roworiented = baij->roworiented;
194   PetscErrorCode ierr;
195   PetscInt       i,j,row,col;
196   PetscInt       rstart_orig=mat->rmap->rstart;
197   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
198   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
199 
200   /* Some Variables required in the macro */
201   Mat            A = baij->A;
202   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
203   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204   MatScalar      *aa=a->a;
205 
206   Mat            B = baij->B;
207   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
208   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209   MatScalar      *ba=b->a;
210 
211   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
212   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
213   MatScalar      *ap,*bap;
214 
215   PetscFunctionBegin;
216   if (v) PetscValidScalarPointer(v,6);
217   for (i=0; i<m; i++) {
218     if (im[i] < 0) continue;
219 #if defined(PETSC_USE_DEBUG)
220     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
221 #endif
222     if (im[i] >= rstart_orig && im[i] < rend_orig) {
223       row = im[i] - rstart_orig;
224       for (j=0; j<n; j++) {
225         if (in[j] >= cstart_orig && in[j] < cend_orig){
226           col = in[j] - cstart_orig;
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined (PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
246               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B = baij->B;
250               b = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba=b->a;
253             } else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
256           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
257           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
258         }
259       }
260     } else {
261       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
262       if (!baij->donotstash) {
263         if (roworiented) {
264           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
265         } else {
266           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
267         }
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
277 {
278   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
279   const PetscScalar *value;
280   MatScalar         *barray=baij->barray;
281   PetscBool         roworiented = baij->roworiented;
282   PetscErrorCode    ierr;
283   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
284   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
285   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
286 
287   PetscFunctionBegin;
288   if(!barray) {
289     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
290     baij->barray = barray;
291   }
292 
293   if (roworiented) {
294     stepval = (n-1)*bs;
295   } else {
296     stepval = (m-1)*bs;
297   }
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315 	    value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) {
319               barray[jj]  = value[jj];
320             }
321             barray += bs;
322           }
323           barray -= bs2;
324         }
325 
326         if (in[j] >= cstart && in[j] < cend){
327           col  = in[j] - cstart;
328           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
329         }
330         else if (in[j] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
333 #endif
334         else {
335           if (mat->was_assembled) {
336             if (!baij->colmap) {
337               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
338             }
339 
340 #if defined(PETSC_USE_DEBUG)
341 #if defined (PETSC_USE_CTABLE)
342             { PetscInt data;
343               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
344               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345             }
346 #else
347             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
348 #endif
349 #endif
350 #if defined (PETSC_USE_CTABLE)
351 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
352             col  = (col - 1)/bs;
353 #else
354             col = (baij->colmap[in[j]] - 1)/bs;
355 #endif
356             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
357               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
358               col =  in[j];
359             }
360           }
361           else col = in[j];
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364       }
365     } else {
366       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
367       if (!baij->donotstash) {
368         if (roworiented) {
369           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         } else {
371           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
372         }
373       }
374     }
375   }
376   PetscFunctionReturn(0);
377 }
378 
379 #define HASH_KEY 0.6180339887
380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 #undef __FUNCT__
384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
386 {
387   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
388   PetscBool      roworiented = baij->roworiented;
389   PetscErrorCode ierr;
390   PetscInt       i,j,row,col;
391   PetscInt       rstart_orig=mat->rmap->rstart;
392   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
393   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
394   PetscReal      tmp;
395   MatScalar      **HD = baij->hd,value;
396 #if defined(PETSC_USE_DEBUG)
397   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
398 #endif
399 
400   PetscFunctionBegin;
401   if (v) PetscValidScalarPointer(v,6);
402   for (i=0; i<m; i++) {
403 #if defined(PETSC_USE_DEBUG)
404     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
405     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
406 #endif
407       row = im[i];
408     if (row >= rstart_orig && row < rend_orig) {
409       for (j=0; j<n; j++) {
410         col = in[j];
411         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412         /* Look up PetscInto the Hash Table */
413         key = (row/bs)*Nbs+(col/bs)+1;
414         h1  = HASH(size,key,tmp);
415 
416 
417         idx = h1;
418 #if defined(PETSC_USE_DEBUG)
419         insert_ct++;
420         total_ct++;
421         if (HT[idx] != key) {
422           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
423           if (idx == size) {
424             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
425             if (idx == h1) {
426               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
427             }
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
435             if (idx == h1) {
436               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
437             }
438           }
439         }
440 #endif
441         /* A HASH table entry is found, so insert the values at the correct address */
442         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
443         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
444       }
445     } else {
446       if (!baij->donotstash) {
447         if (roworiented) {
448           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
449         } else {
450           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
451         }
452       }
453     }
454   }
455 #if defined(PETSC_USE_DEBUG)
456   baij->ht_total_ct = total_ct;
457   baij->ht_insert_ct = insert_ct;
458 #endif
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
467   PetscBool         roworiented = baij->roworiented;
468   PetscErrorCode    ierr;
469   PetscInt          i,j,ii,jj,row,col;
470   PetscInt          rstart=baij->rstartbs;
471   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
472   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
473   PetscReal         tmp;
474   MatScalar         **HD = baij->hd,*baij_a;
475   const PetscScalar *v_t,*value;
476 #if defined(PETSC_USE_DEBUG)
477   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
478 #endif
479 
480   PetscFunctionBegin;
481 
482   if (roworiented) {
483     stepval = (n-1)*bs;
484   } else {
485     stepval = (m-1)*bs;
486   }
487   for (i=0; i<m; i++) {
488 #if defined(PETSC_USE_DEBUG)
489     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
490     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
491 #endif
492     row   = im[i];
493     v_t   = v + i*nbs2;
494     if (row >= rstart && row < rend) {
495       for (j=0; j<n; j++) {
496         col = in[j];
497 
498         /* Look up into the Hash Table */
499         key = row*Nbs+col+1;
500         h1  = HASH(size,key,tmp);
501 
502         idx = h1;
503 #if defined(PETSC_USE_DEBUG)
504         total_ct++;
505         insert_ct++;
506        if (HT[idx] != key) {
507           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
508           if (idx == size) {
509             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
510             if (idx == h1) {
511               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512             }
513           }
514         }
515 #else
516         if (HT[idx] != key) {
517           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
518           if (idx == size) {
519             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
520             if (idx == h1) {
521               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
522             }
523           }
524         }
525 #endif
526         baij_a = HD[idx];
527         if (roworiented) {
528           /*value = v + i*(stepval+bs)*bs + j*bs;*/
529           /* value = v + (i*(stepval+bs)+j)*bs; */
530           value = v_t;
531           v_t  += bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval) {
534               for (jj=ii; jj<bs2; jj+=bs) {
535                 baij_a[jj]  += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval) {
540               for (jj=ii; jj<bs2; jj+=bs) {
541                 baij_a[jj]  = *value++;
542               }
543             }
544           }
545         } else {
546           value = v + j*(stepval+bs)*bs + i*bs;
547           if (addv == ADD_VALUES) {
548             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
549               for (jj=0; jj<bs; jj++) {
550                 baij_a[jj]  += *value++;
551               }
552             }
553           } else {
554             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
555               for (jj=0; jj<bs; jj++) {
556                 baij_a[jj]  = *value++;
557               }
558             }
559           }
560         }
561       }
562     } else {
563       if (!baij->donotstash) {
564         if (roworiented) {
565           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         } else {
567           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
568         }
569       }
570     }
571   }
572 #if defined(PETSC_USE_DEBUG)
573   baij->ht_total_ct = total_ct;
574   baij->ht_insert_ct = insert_ct;
575 #endif
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatGetValues_MPIBAIJ"
581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
582 {
583   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
584   PetscErrorCode ierr;
585   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
586   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
587 
588   PetscFunctionBegin;
589   for (i=0; i<m; i++) {
590     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
591     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
592     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
593       row = idxm[i] - bsrstart;
594       for (j=0; j<n; j++) {
595         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
596         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
597         if (idxn[j] >= bscstart && idxn[j] < bscend){
598           col = idxn[j] - bscstart;
599           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
600         } else {
601           if (!baij->colmap) {
602             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
603           }
604 #if defined (PETSC_USE_CTABLE)
605           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
606           data --;
607 #else
608           data = baij->colmap[idxn[j]/bs]-1;
609 #endif
610           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
611           else {
612             col  = data + idxn[j]%bs;
613             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
614           }
615         }
616       }
617     } else {
618       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
619     }
620   }
621  PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatNorm_MPIBAIJ"
626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
627 {
628   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
629   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
630   PetscErrorCode ierr;
631   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
632   PetscReal      sum = 0.0;
633   MatScalar      *v;
634 
635   PetscFunctionBegin;
636   if (baij->size == 1) {
637     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
638   } else {
639     if (type == NORM_FROBENIUS) {
640       v = amat->a;
641       nz = amat->nz*bs2;
642       for (i=0; i<nz; i++) {
643 #if defined(PETSC_USE_COMPLEX)
644         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
645 #else
646         sum += (*v)*(*v); v++;
647 #endif
648       }
649       v = bmat->a;
650       nz = bmat->nz*bs2;
651       for (i=0; i<nz; i++) {
652 #if defined(PETSC_USE_COMPLEX)
653         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
654 #else
655         sum += (*v)*(*v); v++;
656 #endif
657       }
658       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
659       *nrm = sqrt(*nrm);
660     } else if (type == NORM_1) { /* max column sum */
661       PetscReal *tmp,*tmp2;
662       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
663       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
664       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
665       v = amat->a; jj = amat->j;
666       for (i=0; i<amat->nz; i++) {
667         for (j=0; j<bs; j++){
668           col = bs*(cstart + *jj) + j; /* column index */
669           for (row=0; row<bs; row++){
670             tmp[col] += PetscAbsScalar(*v);  v++;
671           }
672         }
673         jj++;
674       }
675       v = bmat->a; jj = bmat->j;
676       for (i=0; i<bmat->nz; i++) {
677         for (j=0; j<bs; j++){
678           col = bs*garray[*jj] + j;
679           for (row=0; row<bs; row++){
680             tmp[col] += PetscAbsScalar(*v); v++;
681           }
682         }
683         jj++;
684       }
685       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
686       *nrm = 0.0;
687       for (j=0; j<mat->cmap->N; j++) {
688         if (tmp2[j] > *nrm) *nrm = tmp2[j];
689       }
690       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
691     } else if (type == NORM_INFINITY) { /* max row sum */
692       PetscReal *sums;
693       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
694       sum = 0.0;
695       for (j=0; j<amat->mbs; j++) {
696         for (row=0; row<bs; row++) sums[row] = 0.0;
697         v = amat->a + bs2*amat->i[j];
698         nz = amat->i[j+1]-amat->i[j];
699         for (i=0; i<nz; i++) {
700           for (col=0; col<bs; col++){
701             for (row=0; row<bs; row++){
702               sums[row] += PetscAbsScalar(*v); v++;
703             }
704           }
705         }
706         v = bmat->a + bs2*bmat->i[j];
707         nz = bmat->i[j+1]-bmat->i[j];
708         for (i=0; i<nz; i++) {
709           for (col=0; col<bs; col++){
710             for (row=0; row<bs; row++){
711               sums[row] += PetscAbsScalar(*v); v++;
712             }
713           }
714         }
715         for (row=0; row<bs; row++){
716           if (sums[row] > sum) sum = sums[row];
717         }
718       }
719       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
720       ierr = PetscFree(sums);CHKERRQ(ierr);
721     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
722   }
723   PetscFunctionReturn(0);
724 }
725 
726 /*
727   Creates the hash table, and sets the table
728   This table is created only once.
729   If new entried need to be added to the matrix
730   then the hash table has to be destroyed and
731   recreated.
732 */
733 #undef __FUNCT__
734 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
735 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
736 {
737   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
738   Mat            A = baij->A,B=baij->B;
739   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
740   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
741   PetscErrorCode ierr;
742   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
743   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
744   PetscInt       *HT,key;
745   MatScalar      **HD;
746   PetscReal      tmp;
747 #if defined(PETSC_USE_INFO)
748   PetscInt       ct=0,max=0;
749 #endif
750 
751   PetscFunctionBegin;
752   if (baij->ht) PetscFunctionReturn(0);
753 
754   baij->ht_size = (PetscInt)(factor*nz);
755   ht_size       = baij->ht_size;
756 
757   /* Allocate Memory for Hash Table */
758   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
759   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
761   HD   = baij->hd;
762   HT   = baij->ht;
763 
764   /* Loop Over A */
765   for (i=0; i<a->mbs; i++) {
766     for (j=ai[i]; j<ai[i+1]; j++) {
767       row = i+rstart;
768       col = aj[j]+cstart;
769 
770       key = row*Nbs + col + 1;
771       h1  = HASH(ht_size,key,tmp);
772       for (k=0; k<ht_size; k++){
773         if (!HT[(h1+k)%ht_size]) {
774           HT[(h1+k)%ht_size] = key;
775           HD[(h1+k)%ht_size] = a->a + j*bs2;
776           break;
777 #if defined(PETSC_USE_INFO)
778         } else {
779           ct++;
780 #endif
781         }
782       }
783 #if defined(PETSC_USE_INFO)
784       if (k> max) max = k;
785 #endif
786     }
787   }
788   /* Loop Over B */
789   for (i=0; i<b->mbs; i++) {
790     for (j=bi[i]; j<bi[i+1]; j++) {
791       row = i+rstart;
792       col = garray[bj[j]];
793       key = row*Nbs + col + 1;
794       h1  = HASH(ht_size,key,tmp);
795       for (k=0; k<ht_size; k++){
796         if (!HT[(h1+k)%ht_size]) {
797           HT[(h1+k)%ht_size] = key;
798           HD[(h1+k)%ht_size] = b->a + j*bs2;
799           break;
800 #if defined(PETSC_USE_INFO)
801         } else {
802           ct++;
803 #endif
804         }
805       }
806 #if defined(PETSC_USE_INFO)
807       if (k> max) max = k;
808 #endif
809     }
810   }
811 
812   /* Print Summary */
813 #if defined(PETSC_USE_INFO)
814   for (i=0,j=0; i<ht_size; i++) {
815     if (HT[i]) {j++;}
816   }
817   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
818 #endif
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
824 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
825 {
826   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
827   PetscErrorCode ierr;
828   PetscInt       nstash,reallocs;
829   InsertMode     addv;
830 
831   PetscFunctionBegin;
832   if (baij->donotstash || mat->nooffprocentries) {
833     PetscFunctionReturn(0);
834   }
835 
836   /* make sure all processors are either in INSERTMODE or ADDMODE */
837   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
838   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
839   mat->insertmode = addv; /* in case this processor had no cache */
840 
841   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
842   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
846   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
847   PetscFunctionReturn(0);
848 }
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
852 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
853 {
854   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
855   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
856   PetscErrorCode ierr;
857   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
858   PetscInt       *row,*col;
859   PetscBool      r1,r2,r3,other_disassembled;
860   MatScalar      *val;
861   InsertMode     addv = mat->insertmode;
862   PetscMPIInt    n;
863 
864   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
865   PetscFunctionBegin;
866   if (!baij->donotstash && !mat->nooffprocentries) {
867     while (1) {
868       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
869       if (!flg) break;
870 
871       for (i=0; i<n;) {
872         /* Now identify the consecutive vals belonging to the same row */
873         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
874         if (j < n) ncols = j-i;
875         else       ncols = n-i;
876         /* Now assemble all these values with a single function call */
877         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
878         i = j;
879       }
880     }
881     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
882     /* Now process the block-stash. Since the values are stashed column-oriented,
883        set the roworiented flag to column oriented, and after MatSetValues()
884        restore the original flags */
885     r1 = baij->roworiented;
886     r2 = a->roworiented;
887     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
888     baij->roworiented = PETSC_FALSE;
889     a->roworiented    = PETSC_FALSE;
890     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
891     while (1) {
892       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
893       if (!flg) break;
894 
895       for (i=0; i<n;) {
896         /* Now identify the consecutive vals belonging to the same row */
897         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
898         if (j < n) ncols = j-i;
899         else       ncols = n-i;
900         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
901         i = j;
902       }
903     }
904     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
905     baij->roworiented = r1;
906     a->roworiented    = r2;
907     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
908   }
909 
910   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
911   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
912 
913   /* determine if any processor has disassembled, if so we must
914      also disassemble ourselfs, in order that we may reassemble. */
915   /*
916      if nonzero structure of submatrix B cannot change then we know that
917      no processor disassembled thus we can skip this stuff
918   */
919   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
920     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
921     if (mat->was_assembled && !other_disassembled) {
922       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
923     }
924   }
925 
926   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
927     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
928   }
929   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
930   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
932 
933 #if defined(PETSC_USE_INFO)
934   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
935     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
936     baij->ht_total_ct  = 0;
937     baij->ht_insert_ct = 0;
938   }
939 #endif
940   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
941     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
942     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
943     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
944   }
945 
946   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
947   baij->rowvalues = 0;
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
953 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
954 {
955   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
956   PetscErrorCode    ierr;
957   PetscMPIInt       size = baij->size,rank = baij->rank;
958   PetscInt          bs = mat->rmap->bs;
959   PetscBool         iascii,isdraw;
960   PetscViewer       sviewer;
961   PetscViewerFormat format;
962 
963   PetscFunctionBegin;
964   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
966   if (iascii) {
967     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
968     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
969       MatInfo info;
970       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
971       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
972       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
973              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
974       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
975       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
976       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
977       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
978       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
980       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
981       PetscFunctionReturn(0);
982     } else if (format == PETSC_VIEWER_ASCII_INFO) {
983       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
984       PetscFunctionReturn(0);
985     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
986       PetscFunctionReturn(0);
987     }
988   }
989 
990   if (isdraw) {
991     PetscDraw       draw;
992     PetscBool  isnull;
993     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
994     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
995   }
996 
997   if (size == 1) {
998     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
999     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1000   } else {
1001     /* assemble the entire matrix onto first processor. */
1002     Mat         A;
1003     Mat_SeqBAIJ *Aloc;
1004     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1005     MatScalar   *a;
1006 
1007     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1008     /* Perhaps this should be the type of mat? */
1009     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1010     if (!rank) {
1011       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1012     } else {
1013       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1014     }
1015     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1016     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1017     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1018 
1019     /* copy over the A part */
1020     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1021     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1022     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1023 
1024     for (i=0; i<mbs; i++) {
1025       rvals[0] = bs*(baij->rstartbs + i);
1026       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1027       for (j=ai[i]; j<ai[i+1]; j++) {
1028         col = (baij->cstartbs+aj[j])*bs;
1029         for (k=0; k<bs; k++) {
1030           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1031           col++; a += bs;
1032         }
1033       }
1034     }
1035     /* copy over the B part */
1036     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1037     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1038     for (i=0; i<mbs; i++) {
1039       rvals[0] = bs*(baij->rstartbs + i);
1040       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1041       for (j=ai[i]; j<ai[i+1]; j++) {
1042         col = baij->garray[aj[j]]*bs;
1043         for (k=0; k<bs; k++) {
1044           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1045           col++; a += bs;
1046         }
1047       }
1048     }
1049     ierr = PetscFree(rvals);CHKERRQ(ierr);
1050     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1051     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     /*
1053        Everyone has to call to draw the matrix since the graphics waits are
1054        synchronized across all processors that share the PetscDraw object
1055     */
1056     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1057     if (!rank) {
1058       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1059     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1060       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1061       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1062     }
1063     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1064     ierr = MatDestroy(A);CHKERRQ(ierr);
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1071 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1072 {
1073   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1074   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1075   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1076   PetscErrorCode ierr;
1077   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1078   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1079   int            fd;
1080   PetscScalar    *column_values;
1081   FILE           *file;
1082   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1083   PetscInt       message_count,flowcontrolcount;
1084 
1085   PetscFunctionBegin;
1086   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1087   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1088   nz   = bs2*(A->nz + B->nz);
1089   rlen = mat->rmap->n;
1090   if (!rank) {
1091     header[0] = MAT_FILE_CLASSID;
1092     header[1] = mat->rmap->N;
1093     header[2] = mat->cmap->N;
1094     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1095     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1096     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1097     /* get largest number of rows any processor has */
1098     range = mat->rmap->range;
1099     for (i=1; i<size; i++) {
1100       rlen = PetscMax(rlen,range[i+1] - range[i]);
1101     }
1102   } else {
1103     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1104   }
1105 
1106   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1107   /* compute lengths of each row  */
1108   for (i=0; i<a->mbs; i++) {
1109     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1110   }
1111   /* store the row lengths to the file */
1112   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1113   if (!rank) {
1114     MPI_Status status;
1115     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1116     rlen  = (range[1] - range[0])/bs;
1117     for (i=0; i<rlen; i++) {
1118       for (j=0; j<bs; j++) {
1119         row_lens[i*bs+j] = bs*crow_lens[i];
1120       }
1121     }
1122     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1123     for (i=1; i<size; i++) {
1124       rlen = (range[i+1] - range[i])/bs;
1125       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1126       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1127       for (k=0; k<rlen; k++) {
1128 	for (j=0; j<bs; j++) {
1129 	  row_lens[k*bs+j] = bs*crow_lens[k];
1130 	}
1131       }
1132       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1133     }
1134     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1135     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1136   } else {
1137     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1138     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1139     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1140   }
1141   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1142 
1143   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1144      information needed to make it for each row from a block row. This does require more communication but still not more than
1145      the communication needed for the nonzero values  */
1146   nzmax = nz; /*  space a largest processor needs */
1147   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1148   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1149   cnt  = 0;
1150   for (i=0; i<a->mbs; i++) {
1151     pcnt = cnt;
1152     for (j=B->i[i]; j<B->i[i+1]; j++) {
1153       if ( (col = garray[B->j[j]]) > cstart) break;
1154       for (l=0; l<bs; l++) {
1155 	column_indices[cnt++] = bs*col+l;
1156       }
1157     }
1158     for (k=A->i[i]; k<A->i[i+1]; k++) {
1159       for (l=0; l<bs; l++) {
1160         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1161       }
1162     }
1163     for (; j<B->i[i+1]; j++) {
1164       for (l=0; l<bs; l++) {
1165         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1166       }
1167     }
1168     len = cnt - pcnt;
1169     for (k=1; k<bs; k++) {
1170       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1171       cnt += len;
1172     }
1173   }
1174   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1175 
1176   /* store the columns to the file */
1177   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1178   if (!rank) {
1179     MPI_Status status;
1180     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1181     for (i=1; i<size; i++) {
1182       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1183       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1184       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1185       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1188   } else {
1189     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1190     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1191     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1192     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1193   }
1194   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1195 
1196   /* load up the numerical values */
1197   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1198   cnt = 0;
1199   for (i=0; i<a->mbs; i++) {
1200     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1201     for (j=B->i[i]; j<B->i[i+1]; j++) {
1202       if ( garray[B->j[j]] > cstart) break;
1203       for (l=0; l<bs; l++) {
1204         for (ll=0; ll<bs; ll++) {
1205 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1206         }
1207       }
1208       cnt += bs;
1209     }
1210     for (k=A->i[i]; k<A->i[i+1]; k++) {
1211       for (l=0; l<bs; l++) {
1212         for (ll=0; ll<bs; ll++) {
1213           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1214         }
1215       }
1216       cnt += bs;
1217     }
1218     for (; j<B->i[i+1]; j++) {
1219       for (l=0; l<bs; l++) {
1220         for (ll=0; ll<bs; ll++) {
1221 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1222         }
1223       }
1224       cnt += bs;
1225     }
1226     cnt += (bs-1)*rlen;
1227   }
1228   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1229 
1230   /* store the column values to the file */
1231   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1232   if (!rank) {
1233     MPI_Status status;
1234     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1235     for (i=1; i<size; i++) {
1236       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1237       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1238       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1239       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1240     }
1241     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1242   } else {
1243     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1244     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1245     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1246     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1247   }
1248   ierr = PetscFree(column_values);CHKERRQ(ierr);
1249 
1250   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1251   if (file) {
1252     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1253   }
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatView_MPIBAIJ"
1259 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1260 {
1261   PetscErrorCode ierr;
1262   PetscBool      iascii,isdraw,issocket,isbinary;
1263 
1264   PetscFunctionBegin;
1265   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1266   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1269   if (iascii || isdraw || issocket) {
1270     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1271   } else if (isbinary) {
1272     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1273   } else {
1274     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1281 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1282 {
1283   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287 #if defined(PETSC_USE_LOG)
1288   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1289 #endif
1290   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1291   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1292   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1293   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1294 #if defined (PETSC_USE_CTABLE)
1295   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1296 #else
1297   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1298 #endif
1299   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1300   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1301   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1302   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1303   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1304   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1305   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1306   ierr = PetscFree(baij);CHKERRQ(ierr);
1307 
1308   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatMult_MPIBAIJ"
1321 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1322 {
1323   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325   PetscInt       nt;
1326 
1327   PetscFunctionBegin;
1328   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1329   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1330   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1331   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1332   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1333   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1334   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1341 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1342 {
1343   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1348   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1349   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1350   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1356 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1357 {
1358   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360   PetscBool      merged;
1361 
1362   PetscFunctionBegin;
1363   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1364   /* do nondiagonal part */
1365   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1366   if (!merged) {
1367     /* send it on its way */
1368     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1369     /* do local part */
1370     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1371     /* receive remote parts: note this assumes the values are not actually */
1372     /* inserted in yy until the next line */
1373     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1374   } else {
1375     /* do local part */
1376     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1377     /* send it on its way */
1378     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1379     /* values actually were received in the Begin() but we need to call this nop */
1380     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1387 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1388 {
1389   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1390   PetscErrorCode ierr;
1391 
1392   PetscFunctionBegin;
1393   /* do nondiagonal part */
1394   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1395   /* send it on its way */
1396   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1397   /* do local part */
1398   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1399   /* receive remote parts: note this assumes the values are not actually */
1400   /* inserted in yy until the next line, which is true for my implementation*/
1401   /* but is not perhaps always true. */
1402   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /*
1407   This only works correctly for square matrices where the subblock A->A is the
1408    diagonal block
1409 */
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1412 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1413 {
1414   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1415   PetscErrorCode ierr;
1416 
1417   PetscFunctionBegin;
1418   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1419   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatScale_MPIBAIJ"
1425 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1426 {
1427   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1428   PetscErrorCode ierr;
1429 
1430   PetscFunctionBegin;
1431   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1432   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1438 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1439 {
1440   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1441   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1442   PetscErrorCode ierr;
1443   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1444   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1445   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1446 
1447   PetscFunctionBegin;
1448   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1449   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1450   mat->getrowactive = PETSC_TRUE;
1451 
1452   if (!mat->rowvalues && (idx || v)) {
1453     /*
1454         allocate enough space to hold information from the longest row.
1455     */
1456     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1457     PetscInt     max = 1,mbs = mat->mbs,tmp;
1458     for (i=0; i<mbs; i++) {
1459       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1460       if (max < tmp) { max = tmp; }
1461     }
1462     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1463   }
1464   lrow = row - brstart;
1465 
1466   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1467   if (!v)   {pvA = 0; pvB = 0;}
1468   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1469   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1470   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1471   nztot = nzA + nzB;
1472 
1473   cmap  = mat->garray;
1474   if (v  || idx) {
1475     if (nztot) {
1476       /* Sort by increasing column numbers, assuming A and B already sorted */
1477       PetscInt imark = -1;
1478       if (v) {
1479         *v = v_p = mat->rowvalues;
1480         for (i=0; i<nzB; i++) {
1481           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1482           else break;
1483         }
1484         imark = i;
1485         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1486         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1487       }
1488       if (idx) {
1489         *idx = idx_p = mat->rowindices;
1490         if (imark > -1) {
1491           for (i=0; i<imark; i++) {
1492             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1493           }
1494         } else {
1495           for (i=0; i<nzB; i++) {
1496             if (cmap[cworkB[i]/bs] < cstart)
1497               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1498             else break;
1499           }
1500           imark = i;
1501         }
1502         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1503         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1504       }
1505     } else {
1506       if (idx) *idx = 0;
1507       if (v)   *v   = 0;
1508     }
1509   }
1510   *nz = nztot;
1511   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1512   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1518 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1519 {
1520   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1521 
1522   PetscFunctionBegin;
1523   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1524   baij->getrowactive = PETSC_FALSE;
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1530 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1531 {
1532   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1533   PetscErrorCode ierr;
1534 
1535   PetscFunctionBegin;
1536   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1537   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1543 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1544 {
1545   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1546   Mat            A = a->A,B = a->B;
1547   PetscErrorCode ierr;
1548   PetscReal      isend[5],irecv[5];
1549 
1550   PetscFunctionBegin;
1551   info->block_size     = (PetscReal)matin->rmap->bs;
1552   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1553   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1554   isend[3] = info->memory;  isend[4] = info->mallocs;
1555   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1556   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1557   isend[3] += info->memory;  isend[4] += info->mallocs;
1558   if (flag == MAT_LOCAL) {
1559     info->nz_used      = isend[0];
1560     info->nz_allocated = isend[1];
1561     info->nz_unneeded  = isend[2];
1562     info->memory       = isend[3];
1563     info->mallocs      = isend[4];
1564   } else if (flag == MAT_GLOBAL_MAX) {
1565     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1566     info->nz_used      = irecv[0];
1567     info->nz_allocated = irecv[1];
1568     info->nz_unneeded  = irecv[2];
1569     info->memory       = irecv[3];
1570     info->mallocs      = irecv[4];
1571   } else if (flag == MAT_GLOBAL_SUM) {
1572     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1573     info->nz_used      = irecv[0];
1574     info->nz_allocated = irecv[1];
1575     info->nz_unneeded  = irecv[2];
1576     info->memory       = irecv[3];
1577     info->mallocs      = irecv[4];
1578   } else {
1579     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1580   }
1581   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1582   info->fill_ratio_needed = 0;
1583   info->factor_mallocs    = 0;
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1589 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1590 {
1591   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   switch (op) {
1596   case MAT_NEW_NONZERO_LOCATIONS:
1597   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1598   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1599   case MAT_KEEP_NONZERO_PATTERN:
1600   case MAT_NEW_NONZERO_LOCATION_ERR:
1601     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1602     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1603     break;
1604   case MAT_ROW_ORIENTED:
1605     a->roworiented = flg;
1606     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1607     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1608     break;
1609   case MAT_NEW_DIAGONALS:
1610     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1611     break;
1612   case MAT_IGNORE_OFF_PROC_ENTRIES:
1613     a->donotstash = flg;
1614     break;
1615   case MAT_USE_HASH_TABLE:
1616     a->ht_flag = flg;
1617     break;
1618   case MAT_SYMMETRIC:
1619   case MAT_STRUCTURALLY_SYMMETRIC:
1620   case MAT_HERMITIAN:
1621   case MAT_SYMMETRY_ETERNAL:
1622     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1623     break;
1624   default:
1625     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1626   }
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 #undef __FUNCT__
1631 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1632 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1633 {
1634   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1635   Mat_SeqBAIJ    *Aloc;
1636   Mat            B;
1637   PetscErrorCode ierr;
1638   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1639   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1640   MatScalar      *a;
1641 
1642   PetscFunctionBegin;
1643   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1644   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1645     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1646     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1647     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1648     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1649   } else {
1650     B = *matout;
1651   }
1652 
1653   /* copy over the A part */
1654   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1655   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1656   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1657 
1658   for (i=0; i<mbs; i++) {
1659     rvals[0] = bs*(baij->rstartbs + i);
1660     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1661     for (j=ai[i]; j<ai[i+1]; j++) {
1662       col = (baij->cstartbs+aj[j])*bs;
1663       for (k=0; k<bs; k++) {
1664         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1665         col++; a += bs;
1666       }
1667     }
1668   }
1669   /* copy over the B part */
1670   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1671   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1672   for (i=0; i<mbs; i++) {
1673     rvals[0] = bs*(baij->rstartbs + i);
1674     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1675     for (j=ai[i]; j<ai[i+1]; j++) {
1676       col = baij->garray[aj[j]]*bs;
1677       for (k=0; k<bs; k++) {
1678         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1679         col++; a += bs;
1680       }
1681     }
1682   }
1683   ierr = PetscFree(rvals);CHKERRQ(ierr);
1684   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686 
1687   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1688     *matout = B;
1689   } else {
1690     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1691   }
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1697 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1698 {
1699   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1700   Mat            a = baij->A,b = baij->B;
1701   PetscErrorCode ierr;
1702   PetscInt       s1,s2,s3;
1703 
1704   PetscFunctionBegin;
1705   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1706   if (rr) {
1707     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1708     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1709     /* Overlap communication with computation. */
1710     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1711   }
1712   if (ll) {
1713     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1714     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1715     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1716   }
1717   /* scale  the diagonal block */
1718   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1719 
1720   if (rr) {
1721     /* Do a scatter end and then right scale the off-diagonal block */
1722     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1723     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1724   }
1725 
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1731 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1732 {
1733   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1734   PetscErrorCode    ierr;
1735   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1736   PetscInt          i,*owners = A->rmap->range;
1737   PetscInt          *nprocs,j,idx,nsends,row;
1738   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1739   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1740   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1741   MPI_Comm          comm = ((PetscObject)A)->comm;
1742   MPI_Request       *send_waits,*recv_waits;
1743   MPI_Status        recv_status,*send_status;
1744   const PetscScalar *xx;
1745   PetscScalar       *bb;
1746 #if defined(PETSC_DEBUG)
1747   PetscBool         found = PETSC_FALSE;
1748 #endif
1749 
1750   PetscFunctionBegin;
1751   /*  first count number of contributors to each processor */
1752   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1753   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1755   j = 0;
1756   for (i=0; i<N; i++) {
1757     if (lastidx > (idx = rows[i])) j = 0;
1758     lastidx = idx;
1759     for (; j<size; j++) {
1760       if (idx >= owners[j] && idx < owners[j+1]) {
1761         nprocs[2*j]++;
1762         nprocs[2*j+1] = 1;
1763         owner[i] = j;
1764 #if defined(PETSC_DEBUG)
1765         found = PETSC_TRUE;
1766 #endif
1767         break;
1768       }
1769     }
1770 #if defined(PETSC_DEBUG)
1771     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1772     found = PETSC_FALSE;
1773 #endif
1774   }
1775   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1776 
1777   if (A->nooffproczerorows) {
1778     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1779     nrecvs = nsends;
1780     nmax   = N;
1781   } else {
1782     /* inform other processors of number of messages and max length*/
1783     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1784   }
1785 
1786   /* post receives:   */
1787   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1788   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1789   for (i=0; i<nrecvs; i++) {
1790     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1791   }
1792 
1793   /* do sends:
1794      1) starts[i] gives the starting index in svalues for stuff going to
1795      the ith processor
1796   */
1797   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1798   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1799   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1800   starts[0]  = 0;
1801   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1802   for (i=0; i<N; i++) {
1803     svalues[starts[owner[i]]++] = rows[i];
1804   }
1805 
1806   starts[0] = 0;
1807   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1808   count = 0;
1809   for (i=0; i<size; i++) {
1810     if (nprocs[2*i+1]) {
1811       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1812     }
1813   }
1814   ierr = PetscFree(starts);CHKERRQ(ierr);
1815 
1816   base = owners[rank];
1817 
1818   /*  wait on receives */
1819   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1820   count  = nrecvs;
1821   slen = 0;
1822   while (count) {
1823     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1824     /* unpack receives into our local space */
1825     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1826     source[imdex]  = recv_status.MPI_SOURCE;
1827     lens[imdex]    = n;
1828     slen          += n;
1829     count--;
1830   }
1831   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1832 
1833   /* move the data into the send scatter */
1834   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1835   count = 0;
1836   for (i=0; i<nrecvs; i++) {
1837     values = rvalues + i*nmax;
1838     for (j=0; j<lens[i]; j++) {
1839       lrows[count++] = values[j] - base;
1840     }
1841   }
1842   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1843   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1844   ierr = PetscFree(owner);CHKERRQ(ierr);
1845   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1846 
1847   /* fix right hand side if needed */
1848   if (x && b) {
1849     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1850     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1851     for (i=0; i<slen; i++) {
1852       bb[lrows[i]] = diag*xx[lrows[i]];
1853     }
1854     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1855     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1856   }
1857 
1858   /* actually zap the local rows */
1859   /*
1860         Zero the required rows. If the "diagonal block" of the matrix
1861      is square and the user wishes to set the diagonal we use separate
1862      code so that MatSetValues() is not called for each diagonal allocating
1863      new memory, thus calling lots of mallocs and slowing things down.
1864 
1865   */
1866   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1867   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1868   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1869     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1870   } else if (diag != 0.0) {
1871     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1872     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1873        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1874     for (i=0; i<slen; i++) {
1875       row  = lrows[i] + rstart_bs;
1876       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1877     }
1878     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1879     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1880   } else {
1881     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1882   }
1883 
1884   ierr = PetscFree(lrows);CHKERRQ(ierr);
1885 
1886   /* wait on sends */
1887   if (nsends) {
1888     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1889     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1890     ierr = PetscFree(send_status);CHKERRQ(ierr);
1891   }
1892   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1893   ierr = PetscFree(svalues);CHKERRQ(ierr);
1894 
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1900 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1901 {
1902   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1903   PetscErrorCode ierr;
1904 
1905   PetscFunctionBegin;
1906   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatEqual_MPIBAIJ"
1914 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1915 {
1916   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1917   Mat            a,b,c,d;
1918   PetscBool      flg;
1919   PetscErrorCode ierr;
1920 
1921   PetscFunctionBegin;
1922   a = matA->A; b = matA->B;
1923   c = matB->A; d = matB->B;
1924 
1925   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1926   if (flg) {
1927     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1928   }
1929   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatCopy_MPIBAIJ"
1935 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1936 {
1937   PetscErrorCode ierr;
1938   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1939   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1940 
1941   PetscFunctionBegin;
1942   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1943   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1944     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1945   } else {
1946     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1947     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1948   }
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1954 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1955 {
1956   PetscErrorCode ierr;
1957 
1958   PetscFunctionBegin;
1959   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1965 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1966 {
1967   PetscErrorCode ierr;
1968   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1969   PetscBLASInt   bnz,one=1;
1970   Mat_SeqBAIJ    *x,*y;
1971 
1972   PetscFunctionBegin;
1973   if (str == SAME_NONZERO_PATTERN) {
1974     PetscScalar alpha = a;
1975     x = (Mat_SeqBAIJ *)xx->A->data;
1976     y = (Mat_SeqBAIJ *)yy->A->data;
1977     bnz = PetscBLASIntCast(x->nz);
1978     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1979     x = (Mat_SeqBAIJ *)xx->B->data;
1980     y = (Mat_SeqBAIJ *)yy->B->data;
1981     bnz = PetscBLASIntCast(x->nz);
1982     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1983   } else {
1984     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1985   }
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1991 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1992 {
1993   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1994   PetscInt rbs,cbs;
1995   PetscErrorCode ierr;
1996 
1997   PetscFunctionBegin;
1998   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1999   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2000   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2001   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2002   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2003   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2009 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2010 {
2011   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2012   PetscErrorCode ierr;
2013 
2014   PetscFunctionBegin;
2015   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2016   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2022 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2023 {
2024   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2025   PetscErrorCode ierr;
2026 
2027   PetscFunctionBegin;
2028   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2029   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2035 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2036 {
2037   PetscErrorCode ierr;
2038   IS             iscol_local;
2039   PetscInt       csize;
2040 
2041   PetscFunctionBegin;
2042   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2043   if (call == MAT_REUSE_MATRIX) {
2044     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2045     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2046   } else {
2047     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2048   }
2049   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2050   if (call == MAT_INITIAL_MATRIX) {
2051     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2052     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
2053   }
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2059 /*
2060     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2061   in local and then by concatenating the local matrices the end result.
2062   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2063 */
2064 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2065 {
2066   PetscErrorCode ierr;
2067   PetscMPIInt    rank,size;
2068   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2069   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2070   Mat            *local,M,Mreuse;
2071   MatScalar      *vwork,*aa;
2072   MPI_Comm       comm = ((PetscObject)mat)->comm;
2073   Mat_SeqBAIJ    *aij;
2074 
2075 
2076   PetscFunctionBegin;
2077   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2078   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2079 
2080   if (call ==  MAT_REUSE_MATRIX) {
2081     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2082     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2083     local = &Mreuse;
2084     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2085   } else {
2086     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2087     Mreuse = *local;
2088     ierr   = PetscFree(local);CHKERRQ(ierr);
2089   }
2090 
2091   /*
2092       m - number of local rows
2093       n - number of columns (same on all processors)
2094       rstart - first row in new global matrix generated
2095   */
2096   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2097   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2098   m    = m/bs;
2099   n    = n/bs;
2100 
2101   if (call == MAT_INITIAL_MATRIX) {
2102     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2103     ii  = aij->i;
2104     jj  = aij->j;
2105 
2106     /*
2107         Determine the number of non-zeros in the diagonal and off-diagonal
2108         portions of the matrix in order to do correct preallocation
2109     */
2110 
2111     /* first get start and end of "diagonal" columns */
2112     if (csize == PETSC_DECIDE) {
2113       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2114       if (mglobal == n*bs) { /* square matrix */
2115 	nlocal = m;
2116       } else {
2117         nlocal = n/size + ((n % size) > rank);
2118       }
2119     } else {
2120       nlocal = csize/bs;
2121     }
2122     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2123     rstart = rend - nlocal;
2124     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2125 
2126     /* next, compute all the lengths */
2127     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2128     olens = dlens + m;
2129     for (i=0; i<m; i++) {
2130       jend = ii[i+1] - ii[i];
2131       olen = 0;
2132       dlen = 0;
2133       for (j=0; j<jend; j++) {
2134         if (*jj < rstart || *jj >= rend) olen++;
2135         else dlen++;
2136         jj++;
2137       }
2138       olens[i] = olen;
2139       dlens[i] = dlen;
2140     }
2141     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2142     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2143     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2144     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2145     ierr = PetscFree(dlens);CHKERRQ(ierr);
2146   } else {
2147     PetscInt ml,nl;
2148 
2149     M = *newmat;
2150     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2151     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2152     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2153     /*
2154          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2155        rather than the slower MatSetValues().
2156     */
2157     M->was_assembled = PETSC_TRUE;
2158     M->assembled     = PETSC_FALSE;
2159   }
2160   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2161   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2162   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2163   ii  = aij->i;
2164   jj  = aij->j;
2165   aa  = aij->a;
2166   for (i=0; i<m; i++) {
2167     row   = rstart/bs + i;
2168     nz    = ii[i+1] - ii[i];
2169     cwork = jj;     jj += nz;
2170     vwork = aa;     aa += nz;
2171     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2172   }
2173 
2174   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2175   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2176   *newmat = M;
2177 
2178   /* save submatrix used in processor for next request */
2179   if (call ==  MAT_INITIAL_MATRIX) {
2180     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2181     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2182   }
2183 
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatPermute_MPIBAIJ"
2189 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2190 {
2191   MPI_Comm       comm,pcomm;
2192   PetscInt       first,local_size,nrows;
2193   const PetscInt *rows;
2194   PetscMPIInt    size;
2195   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2196   PetscErrorCode ierr;
2197 
2198   PetscFunctionBegin;
2199   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2200   /* make a collective version of 'rowp' */
2201   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2202   if (pcomm==comm) {
2203     crowp = rowp;
2204   } else {
2205     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2206     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2207     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2208     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2209   }
2210   /* collect the global row permutation and invert it */
2211   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2212   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2213   if (pcomm!=comm) {
2214     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2215   }
2216   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2217   /* get the local target indices */
2218   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2219   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2220   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2221   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2222   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2223   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2224   /* the column permutation is so much easier;
2225      make a local version of 'colp' and invert it */
2226   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2227   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2228   if (size==1) {
2229     lcolp = colp;
2230   } else {
2231     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2232     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2233     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2234   }
2235   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2236   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2237   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2238   if (size>1) {
2239     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2240     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2241   }
2242   /* now we just get the submatrix */
2243   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2244   /* clean up */
2245   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2246   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #undef __FUNCT__
2251 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2252 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2253 {
2254   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2255   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2256 
2257   PetscFunctionBegin;
2258   if (nghosts) { *nghosts = B->nbs;}
2259   if (ghosts) {*ghosts = baij->garray;}
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2264 
2265 #undef __FUNCT__
2266 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2267 /*
2268     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2269 */
2270 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2271 {
2272   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2273   PetscErrorCode        ierr;
2274   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2275   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2276   const PetscInt        *is;
2277   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2278   PetscInt              *rowhit,M,cstart,cend,colb;
2279   PetscInt              *columnsforrow,l;
2280   IS                    *isa;
2281   PetscBool              done,flg;
2282   ISLocalToGlobalMapping map = mat->cbmapping;
2283   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2284 
2285   PetscFunctionBegin;
2286   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2287   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2288 
2289   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2290   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2291   M                = mat->rmap->n/bs;
2292   cstart           = mat->cmap->rstart/bs;
2293   cend             = mat->cmap->rend/bs;
2294   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2295   c->N             = mat->cmap->N/bs;
2296   c->m             = mat->rmap->n/bs;
2297   c->rstart        = mat->rmap->rstart/bs;
2298 
2299   c->ncolors       = nis;
2300   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2301   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2302   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2303   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2304   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2305   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2306 
2307   /* Allow access to data structures of local part of matrix */
2308   if (!baij->colmap) {
2309     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2310   }
2311   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2312   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2313 
2314   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2315   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2316 
2317   for (i=0; i<nis; i++) {
2318     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2319     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2320     c->ncolumns[i] = n;
2321     if (n) {
2322       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2323       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2324       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2325     } else {
2326       c->columns[i]  = 0;
2327     }
2328 
2329     if (ctype == IS_COLORING_GLOBAL){
2330       /* Determine the total (parallel) number of columns of this color */
2331       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2332       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2333 
2334       nn   = PetscMPIIntCast(n);
2335       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2336       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2337       if (!nctot) {
2338         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2339       }
2340 
2341       disp[0] = 0;
2342       for (j=1; j<size; j++) {
2343         disp[j] = disp[j-1] + ncolsonproc[j-1];
2344       }
2345 
2346       /* Get complete list of columns for color on each processor */
2347       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2348       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2349       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2350     } else if (ctype == IS_COLORING_GHOSTED){
2351       /* Determine local number of columns of this color on this process, including ghost points */
2352       nctot = n;
2353       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2354       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2355     } else {
2356       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2357     }
2358 
2359     /*
2360        Mark all rows affect by these columns
2361     */
2362     /* Temporary option to allow for debugging/testing */
2363     flg  = PETSC_FALSE;
2364     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2365     if (!flg) {/*-----------------------------------------------------------------------------*/
2366       /* crude, fast version */
2367       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2368       /* loop over columns*/
2369       for (j=0; j<nctot; j++) {
2370         if (ctype == IS_COLORING_GHOSTED) {
2371           col = ltog[cols[j]];
2372         } else {
2373           col  = cols[j];
2374         }
2375         if (col >= cstart && col < cend) {
2376           /* column is in diagonal block of matrix */
2377           rows = A_cj + A_ci[col-cstart];
2378           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2379         } else {
2380 #if defined (PETSC_USE_CTABLE)
2381           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2382 	  colb --;
2383 #else
2384           colb = baij->colmap[col] - 1;
2385 #endif
2386           if (colb == -1) {
2387             m = 0;
2388           } else {
2389             colb = colb/bs;
2390             rows = B_cj + B_ci[colb];
2391             m    = B_ci[colb+1] - B_ci[colb];
2392           }
2393         }
2394         /* loop over columns marking them in rowhit */
2395         for (k=0; k<m; k++) {
2396           rowhit[*rows++] = col + 1;
2397         }
2398       }
2399 
2400       /* count the number of hits */
2401       nrows = 0;
2402       for (j=0; j<M; j++) {
2403         if (rowhit[j]) nrows++;
2404       }
2405       c->nrows[i]         = nrows;
2406       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2407       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2408       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2409       nrows = 0;
2410       for (j=0; j<M; j++) {
2411         if (rowhit[j]) {
2412           c->rows[i][nrows]           = j;
2413           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2414           nrows++;
2415         }
2416       }
2417     } else {/*-------------------------------------------------------------------------------*/
2418       /* slow version, using rowhit as a linked list */
2419       PetscInt currentcol,fm,mfm;
2420       rowhit[M] = M;
2421       nrows     = 0;
2422       /* loop over columns*/
2423       for (j=0; j<nctot; j++) {
2424         if (ctype == IS_COLORING_GHOSTED) {
2425           col = ltog[cols[j]];
2426         } else {
2427           col  = cols[j];
2428         }
2429         if (col >= cstart && col < cend) {
2430           /* column is in diagonal block of matrix */
2431           rows = A_cj + A_ci[col-cstart];
2432           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2433         } else {
2434 #if defined (PETSC_USE_CTABLE)
2435           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2436           colb --;
2437 #else
2438           colb = baij->colmap[col] - 1;
2439 #endif
2440           if (colb == -1) {
2441             m = 0;
2442           } else {
2443             colb = colb/bs;
2444             rows = B_cj + B_ci[colb];
2445             m    = B_ci[colb+1] - B_ci[colb];
2446           }
2447         }
2448 
2449         /* loop over columns marking them in rowhit */
2450         fm    = M; /* fm points to first entry in linked list */
2451         for (k=0; k<m; k++) {
2452           currentcol = *rows++;
2453 	  /* is it already in the list? */
2454           do {
2455             mfm  = fm;
2456             fm   = rowhit[fm];
2457           } while (fm < currentcol);
2458           /* not in list so add it */
2459           if (fm != currentcol) {
2460             nrows++;
2461             columnsforrow[currentcol] = col;
2462             /* next three lines insert new entry into linked list */
2463             rowhit[mfm]               = currentcol;
2464             rowhit[currentcol]        = fm;
2465             fm                        = currentcol;
2466             /* fm points to present position in list since we know the columns are sorted */
2467           } else {
2468             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2469           }
2470         }
2471       }
2472       c->nrows[i]         = nrows;
2473       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2474       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2475       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2476       /* now store the linked list of rows into c->rows[i] */
2477       nrows = 0;
2478       fm    = rowhit[M];
2479       do {
2480         c->rows[i][nrows]            = fm;
2481         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2482         fm                           = rowhit[fm];
2483       } while (fm < M);
2484     } /* ---------------------------------------------------------------------------------------*/
2485     ierr = PetscFree(cols);CHKERRQ(ierr);
2486   }
2487 
2488   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2489   /*
2490        vscale will contain the "diagonal" on processor scalings followed by the off processor
2491   */
2492   if (ctype == IS_COLORING_GLOBAL) {
2493     PetscInt *garray;
2494     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2495     for (i=0; i<baij->B->cmap->n/bs; i++) {
2496       for (j=0; j<bs; j++) {
2497         garray[i*bs+j] = bs*baij->garray[i]+j;
2498       }
2499     }
2500     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2501     ierr = PetscFree(garray);CHKERRQ(ierr);
2502     CHKMEMQ;
2503     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2504     for (k=0; k<c->ncolors; k++) {
2505       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2506       for (l=0; l<c->nrows[k]; l++) {
2507         col = c->columnsforrow[k][l];
2508         if (col >= cstart && col < cend) {
2509           /* column is in diagonal block of matrix */
2510           colb = col - cstart;
2511         } else {
2512           /* column  is in "off-processor" part */
2513 #if defined (PETSC_USE_CTABLE)
2514           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2515           colb --;
2516 #else
2517           colb = baij->colmap[col] - 1;
2518 #endif
2519           colb = colb/bs;
2520           colb += cend - cstart;
2521         }
2522         c->vscaleforrow[k][l] = colb;
2523       }
2524     }
2525   } else if (ctype == IS_COLORING_GHOSTED) {
2526     /* Get gtol mapping */
2527     PetscInt N = mat->cmap->N, *gtol;
2528     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2529     for (i=0; i<N; i++) gtol[i] = -1;
2530     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2531 
2532     c->vscale = 0; /* will be created in MatFDColoringApply() */
2533     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2534     for (k=0; k<c->ncolors; k++) {
2535       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2536       for (l=0; l<c->nrows[k]; l++) {
2537         col = c->columnsforrow[k][l];      /* global column index */
2538         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2539       }
2540     }
2541     ierr = PetscFree(gtol);CHKERRQ(ierr);
2542   }
2543   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2544 
2545   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2546   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2547   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2548   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2549     CHKMEMQ;
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2555 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2556 {
2557   Mat            B;
2558   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2559   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2560   Mat_SeqAIJ     *b;
2561   PetscErrorCode ierr;
2562   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2563   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2564   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2565 
2566   PetscFunctionBegin;
2567   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2568   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2569 
2570   /* ----------------------------------------------------------------
2571      Tell every processor the number of nonzeros per row
2572   */
2573   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2574   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2575     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2576   }
2577   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2578   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2579   displs     = recvcounts + size;
2580   for (i=0; i<size; i++) {
2581     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2582     displs[i]     = A->rmap->range[i]/bs;
2583   }
2584 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2585   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2586 #else
2587   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2588 #endif
2589   /* ---------------------------------------------------------------
2590      Create the sequential matrix of the same type as the local block diagonal
2591   */
2592   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2593   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2594   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2595   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2596   b = (Mat_SeqAIJ *)B->data;
2597 
2598   /*--------------------------------------------------------------------
2599     Copy my part of matrix column indices over
2600   */
2601   sendcount  = ad->nz + bd->nz;
2602   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2603   a_jsendbuf = ad->j;
2604   b_jsendbuf = bd->j;
2605   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2606   cnt        = 0;
2607   for (i=0; i<n; i++) {
2608 
2609     /* put in lower diagonal portion */
2610     m = bd->i[i+1] - bd->i[i];
2611     while (m > 0) {
2612       /* is it above diagonal (in bd (compressed) numbering) */
2613       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2614       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2615       m--;
2616     }
2617 
2618     /* put in diagonal portion */
2619     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2620       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2621     }
2622 
2623     /* put in upper diagonal portion */
2624     while (m-- > 0) {
2625       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2626     }
2627   }
2628   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2629 
2630   /*--------------------------------------------------------------------
2631     Gather all column indices to all processors
2632   */
2633   for (i=0; i<size; i++) {
2634     recvcounts[i] = 0;
2635     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2636       recvcounts[i] += lens[j];
2637     }
2638   }
2639   displs[0]  = 0;
2640   for (i=1; i<size; i++) {
2641     displs[i] = displs[i-1] + recvcounts[i-1];
2642   }
2643 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2644   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2645 #else
2646   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2647 #endif
2648   /*--------------------------------------------------------------------
2649     Assemble the matrix into useable form (note numerical values not yet set)
2650   */
2651   /* set the b->ilen (length of each row) values */
2652   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2653   /* set the b->i indices */
2654   b->i[0] = 0;
2655   for (i=1; i<=A->rmap->N/bs; i++) {
2656     b->i[i] = b->i[i-1] + lens[i-1];
2657   }
2658   ierr = PetscFree(lens);CHKERRQ(ierr);
2659   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2660   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2661   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2662 
2663   if (A->symmetric){
2664     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2665   } else if (A->hermitian) {
2666     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2667   } else if (A->structurally_symmetric) {
2668     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2669   }
2670   *newmat = B;
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatSOR_MPIBAIJ"
2676 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2677 {
2678   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2679   PetscErrorCode ierr;
2680   Vec            bb1 = 0;
2681 
2682   PetscFunctionBegin;
2683   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2684     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2685   }
2686 
2687   if (flag == SOR_APPLY_UPPER) {
2688     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2689     PetscFunctionReturn(0);
2690   }
2691 
2692   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2693     if (flag & SOR_ZERO_INITIAL_GUESS) {
2694       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2695       its--;
2696     }
2697 
2698     while (its--) {
2699       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2700       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2701 
2702       /* update rhs: bb1 = bb - B*x */
2703       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2704       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2705 
2706       /* local sweep */
2707       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2708     }
2709   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2710     if (flag & SOR_ZERO_INITIAL_GUESS) {
2711       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2712       its--;
2713     }
2714     while (its--) {
2715       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2716       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717 
2718       /* update rhs: bb1 = bb - B*x */
2719       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2720       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2721 
2722       /* local sweep */
2723       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2724     }
2725   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2726     if (flag & SOR_ZERO_INITIAL_GUESS) {
2727       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2728       its--;
2729     }
2730     while (its--) {
2731       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2732       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2733 
2734       /* update rhs: bb1 = bb - B*x */
2735       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2736       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2737 
2738       /* local sweep */
2739       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2740     }
2741   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2742 
2743   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2744   PetscFunctionReturn(0);
2745 }
2746 
2747 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2748 
2749 
2750 /* -------------------------------------------------------------------*/
2751 static struct _MatOps MatOps_Values = {
2752        MatSetValues_MPIBAIJ,
2753        MatGetRow_MPIBAIJ,
2754        MatRestoreRow_MPIBAIJ,
2755        MatMult_MPIBAIJ,
2756 /* 4*/ MatMultAdd_MPIBAIJ,
2757        MatMultTranspose_MPIBAIJ,
2758        MatMultTransposeAdd_MPIBAIJ,
2759        0,
2760        0,
2761        0,
2762 /*10*/ 0,
2763        0,
2764        0,
2765        MatSOR_MPIBAIJ,
2766        MatTranspose_MPIBAIJ,
2767 /*15*/ MatGetInfo_MPIBAIJ,
2768        MatEqual_MPIBAIJ,
2769        MatGetDiagonal_MPIBAIJ,
2770        MatDiagonalScale_MPIBAIJ,
2771        MatNorm_MPIBAIJ,
2772 /*20*/ MatAssemblyBegin_MPIBAIJ,
2773        MatAssemblyEnd_MPIBAIJ,
2774        MatSetOption_MPIBAIJ,
2775        MatZeroEntries_MPIBAIJ,
2776 /*24*/ MatZeroRows_MPIBAIJ,
2777        0,
2778        0,
2779        0,
2780        0,
2781 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2782        0,
2783        0,
2784        0,
2785        0,
2786 /*34*/ MatDuplicate_MPIBAIJ,
2787        0,
2788        0,
2789        0,
2790        0,
2791 /*39*/ MatAXPY_MPIBAIJ,
2792        MatGetSubMatrices_MPIBAIJ,
2793        MatIncreaseOverlap_MPIBAIJ,
2794        MatGetValues_MPIBAIJ,
2795        MatCopy_MPIBAIJ,
2796 /*44*/ 0,
2797        MatScale_MPIBAIJ,
2798        0,
2799        0,
2800        0,
2801 /*49*/ MatSetBlockSize_MPIBAIJ,
2802        0,
2803        0,
2804        0,
2805        0,
2806 /*54*/ MatFDColoringCreate_MPIBAIJ,
2807        0,
2808        MatSetUnfactored_MPIBAIJ,
2809        MatPermute_MPIBAIJ,
2810        MatSetValuesBlocked_MPIBAIJ,
2811 /*59*/ MatGetSubMatrix_MPIBAIJ,
2812        MatDestroy_MPIBAIJ,
2813        MatView_MPIBAIJ,
2814        0,
2815        0,
2816 /*64*/ 0,
2817        0,
2818        0,
2819        0,
2820        0,
2821 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2822        0,
2823        0,
2824        0,
2825        0,
2826 /*74*/ 0,
2827        MatFDColoringApply_BAIJ,
2828        0,
2829        0,
2830        0,
2831 /*79*/ 0,
2832        0,
2833        0,
2834        0,
2835        MatLoad_MPIBAIJ,
2836 /*84*/ 0,
2837        0,
2838        0,
2839        0,
2840        0,
2841 /*89*/ 0,
2842        0,
2843        0,
2844        0,
2845        0,
2846 /*94*/ 0,
2847        0,
2848        0,
2849        0,
2850        0,
2851 /*99*/ 0,
2852        0,
2853        0,
2854        0,
2855        0,
2856 /*104*/0,
2857        MatRealPart_MPIBAIJ,
2858        MatImaginaryPart_MPIBAIJ,
2859        0,
2860        0,
2861 /*109*/0,
2862        0,
2863        0,
2864        0,
2865        0,
2866 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2867        0,
2868        MatGetGhosts_MPIBAIJ,
2869        0,
2870        0,
2871 /*119*/0,
2872        0,
2873        0,
2874        0
2875 };
2876 
2877 EXTERN_C_BEGIN
2878 #undef __FUNCT__
2879 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2880 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *a)
2881 {
2882   PetscFunctionBegin;
2883   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2884   *iscopy = PETSC_FALSE;
2885   PetscFunctionReturn(0);
2886 }
2887 EXTERN_C_END
2888 
2889 EXTERN_C_BEGIN
2890 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2891 EXTERN_C_END
2892 
2893 EXTERN_C_BEGIN
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2896 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2897 {
2898   PetscInt       m,rstart,cstart,cend;
2899   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2900   const PetscInt *JJ=0;
2901   PetscScalar    *values=0;
2902   PetscErrorCode ierr;
2903 
2904   PetscFunctionBegin;
2905 
2906   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2907   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2908   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2909   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2910   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2911   m      = B->rmap->n/bs;
2912   rstart = B->rmap->rstart/bs;
2913   cstart = B->cmap->rstart/bs;
2914   cend   = B->cmap->rend/bs;
2915 
2916   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2917   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2918   for (i=0; i<m; i++) {
2919     nz = ii[i+1] - ii[i];
2920     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2921     nz_max = PetscMax(nz_max,nz);
2922     JJ  = jj + ii[i];
2923     for (j=0; j<nz; j++) {
2924       if (*JJ >= cstart) break;
2925       JJ++;
2926     }
2927     d = 0;
2928     for (; j<nz; j++) {
2929       if (*JJ++ >= cend) break;
2930       d++;
2931     }
2932     d_nnz[i] = d;
2933     o_nnz[i] = nz - d;
2934   }
2935   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2936   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2937 
2938   values = (PetscScalar*)V;
2939   if (!values) {
2940     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2941     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2942   }
2943   for (i=0; i<m; i++) {
2944     PetscInt          row    = i + rstart;
2945     PetscInt          ncols  = ii[i+1] - ii[i];
2946     const PetscInt    *icols = jj + ii[i];
2947     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2948     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2949   }
2950 
2951   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2952   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2953   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2954 
2955   PetscFunctionReturn(0);
2956 }
2957 EXTERN_C_END
2958 
2959 #undef __FUNCT__
2960 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2961 /*@C
2962    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2963    (the default parallel PETSc format).
2964 
2965    Collective on MPI_Comm
2966 
2967    Input Parameters:
2968 +  A - the matrix
2969 .  bs - the block size
2970 .  i - the indices into j for the start of each local row (starts with zero)
2971 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2972 -  v - optional values in the matrix
2973 
2974    Level: developer
2975 
2976 .keywords: matrix, aij, compressed row, sparse, parallel
2977 
2978 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2979 @*/
2980 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2981 {
2982   PetscErrorCode ierr;
2983 
2984   PetscFunctionBegin;
2985   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 EXTERN_C_BEGIN
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2992 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2993 {
2994   Mat_MPIBAIJ    *b;
2995   PetscErrorCode ierr;
2996   PetscInt       i, newbs = PetscAbs(bs);
2997 
2998   PetscFunctionBegin;
2999   if (bs < 0) {
3000     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
3001       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3002     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3003     bs   = PetscAbs(bs);
3004   }
3005   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3006   bs = newbs;
3007 
3008 
3009   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3010   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3011   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3012   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3013   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3014 
3015   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3016   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3017   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3018   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3019 
3020   if (d_nnz) {
3021     for (i=0; i<B->rmap->n/bs; i++) {
3022       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3023     }
3024   }
3025   if (o_nnz) {
3026     for (i=0; i<B->rmap->n/bs; i++) {
3027       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3028     }
3029   }
3030 
3031   b = (Mat_MPIBAIJ*)B->data;
3032   b->bs2 = bs*bs;
3033   b->mbs = B->rmap->n/bs;
3034   b->nbs = B->cmap->n/bs;
3035   b->Mbs = B->rmap->N/bs;
3036   b->Nbs = B->cmap->N/bs;
3037 
3038   for (i=0; i<=b->size; i++) {
3039     b->rangebs[i] = B->rmap->range[i]/bs;
3040   }
3041   b->rstartbs = B->rmap->rstart/bs;
3042   b->rendbs   = B->rmap->rend/bs;
3043   b->cstartbs = B->cmap->rstart/bs;
3044   b->cendbs   = B->cmap->rend/bs;
3045 
3046   if (!B->preallocated) {
3047     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3048     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3049     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3050     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3051     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3052     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3053     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3054     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3055     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3056   }
3057 
3058   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3059   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3060   B->preallocated = PETSC_TRUE;
3061   PetscFunctionReturn(0);
3062 }
3063 EXTERN_C_END
3064 
3065 EXTERN_C_BEGIN
3066 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3067 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3068 EXTERN_C_END
3069 
3070 
3071 EXTERN_C_BEGIN
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3074 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3075 {
3076   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3077   PetscErrorCode ierr;
3078   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3079   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3080   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3081 
3082   PetscFunctionBegin;
3083   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3084   ii[0] = 0;
3085   CHKMEMQ;
3086   for (i=0; i<M; i++) {
3087     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3088     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3089     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3090     /* remove one from count of matrix has diagonal */
3091     for (j=id[i]; j<id[i+1]; j++) {
3092       if (jd[j] == i) {ii[i+1]--;break;}
3093     }
3094   CHKMEMQ;
3095   }
3096   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3097   cnt = 0;
3098   for (i=0; i<M; i++) {
3099     for (j=io[i]; j<io[i+1]; j++) {
3100       if (garray[jo[j]] > rstart) break;
3101       jj[cnt++] = garray[jo[j]];
3102   CHKMEMQ;
3103     }
3104     for (k=id[i]; k<id[i+1]; k++) {
3105       if (jd[k] != i) {
3106         jj[cnt++] = rstart + jd[k];
3107   CHKMEMQ;
3108       }
3109     }
3110     for (;j<io[i+1]; j++) {
3111       jj[cnt++] = garray[jo[j]];
3112   CHKMEMQ;
3113     }
3114   }
3115   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3116   PetscFunctionReturn(0);
3117 }
3118 EXTERN_C_END
3119 
3120 #include "../src/mat/impls/aij/mpi/mpiaij.h"
3121 EXTERN_C_BEGIN
3122 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3123 EXTERN_C_END
3124 
3125 EXTERN_C_BEGIN
3126 #undef __FUNCT__
3127 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3128 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3129 {
3130   PetscErrorCode ierr;
3131   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3132   Mat            B;
3133   Mat_MPIAIJ     *b;
3134 
3135   PetscFunctionBegin;
3136   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3137 
3138   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3139   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3140   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3141   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3142   b = (Mat_MPIAIJ*) B->data;
3143 
3144   ierr = MatDestroy(b->A);CHKERRQ(ierr);
3145   ierr = MatDestroy(b->B);CHKERRQ(ierr);
3146   ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3147   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3148   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3149   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3150   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3151   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3152   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3153   if (reuse == MAT_REUSE_MATRIX) {
3154     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3155   } else {
3156    *newmat = B;
3157   }
3158   PetscFunctionReturn(0);
3159 }
3160 EXTERN_C_END
3161 
3162 EXTERN_C_BEGIN
3163 #if defined(PETSC_HAVE_MUMPS)
3164 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3165 #endif
3166 EXTERN_C_END
3167 
3168 /*MC
3169    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3170 
3171    Options Database Keys:
3172 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3173 . -mat_block_size <bs> - set the blocksize used to store the matrix
3174 - -mat_use_hash_table <fact>
3175 
3176   Level: beginner
3177 
3178 .seealso: MatCreateMPIBAIJ
3179 M*/
3180 
3181 EXTERN_C_BEGIN
3182 #undef __FUNCT__
3183 #define __FUNCT__ "MatCreate_MPIBAIJ"
3184 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3185 {
3186   Mat_MPIBAIJ    *b;
3187   PetscErrorCode ierr;
3188   PetscBool      flg;
3189 
3190   PetscFunctionBegin;
3191   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3192   B->data = (void*)b;
3193 
3194 
3195   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3196   B->assembled  = PETSC_FALSE;
3197 
3198   B->insertmode = NOT_SET_VALUES;
3199   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3200   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3201 
3202   /* build local table of row and column ownerships */
3203   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3204 
3205   /* build cache for off array entries formed */
3206   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3207   b->donotstash  = PETSC_FALSE;
3208   b->colmap      = PETSC_NULL;
3209   b->garray      = PETSC_NULL;
3210   b->roworiented = PETSC_TRUE;
3211 
3212   /* stuff used in block assembly */
3213   b->barray       = 0;
3214 
3215   /* stuff used for matrix vector multiply */
3216   b->lvec         = 0;
3217   b->Mvctx        = 0;
3218 
3219   /* stuff for MatGetRow() */
3220   b->rowindices   = 0;
3221   b->rowvalues    = 0;
3222   b->getrowactive = PETSC_FALSE;
3223 
3224   /* hash table stuff */
3225   b->ht           = 0;
3226   b->hd           = 0;
3227   b->ht_size      = 0;
3228   b->ht_flag      = PETSC_FALSE;
3229   b->ht_fact      = 0;
3230   b->ht_total_ct  = 0;
3231   b->ht_insert_ct = 0;
3232 
3233   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3234     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3235     if (flg) {
3236       PetscReal fact = 1.39;
3237       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3238       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3239       if (fact <= 1.0) fact = 1.39;
3240       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3241       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3242     }
3243   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3244 
3245 #if defined(PETSC_HAVE_MUMPS)
3246   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3247 #endif
3248   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3249                                      "MatConvert_MPIBAIJ_MPIAdj",
3250                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3251   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3252                                      "MatConvert_MPIBAIJ_MPIAIJ",
3253                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3254   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3255                                      "MatStoreValues_MPIBAIJ",
3256                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3257   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3258                                      "MatRetrieveValues_MPIBAIJ",
3259                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3260   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3261                                      "MatGetDiagonalBlock_MPIBAIJ",
3262                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3263   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3264                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3265                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3266   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3267 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3268 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3269   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3270                                      "MatDiagonalScaleLocal_MPIBAIJ",
3271                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3272   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3273                                      "MatSetHashTableFactor_MPIBAIJ",
3274                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3275   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3276   PetscFunctionReturn(0);
3277 }
3278 EXTERN_C_END
3279 
3280 /*MC
3281    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3282 
3283    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3284    and MATMPIBAIJ otherwise.
3285 
3286    Options Database Keys:
3287 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3288 
3289   Level: beginner
3290 
3291 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3292 M*/
3293 
3294 EXTERN_C_BEGIN
3295 #undef __FUNCT__
3296 #define __FUNCT__ "MatCreate_BAIJ"
3297 PetscErrorCode  MatCreate_BAIJ(Mat A)
3298 {
3299   PetscErrorCode ierr;
3300   PetscMPIInt    size;
3301 
3302   PetscFunctionBegin;
3303   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3304   if (size == 1) {
3305     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3306   } else {
3307     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3308   }
3309   PetscFunctionReturn(0);
3310 }
3311 EXTERN_C_END
3312 
3313 #undef __FUNCT__
3314 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3315 /*@C
3316    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3317    (block compressed row).  For good matrix assembly performance
3318    the user should preallocate the matrix storage by setting the parameters
3319    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3320    performance can be increased by more than a factor of 50.
3321 
3322    Collective on Mat
3323 
3324    Input Parameters:
3325 +  A - the matrix
3326 .  bs   - size of blockk
3327 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3328            submatrix  (same for all local rows)
3329 .  d_nnz - array containing the number of block nonzeros in the various block rows
3330            of the in diagonal portion of the local (possibly different for each block
3331            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3332 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3333            submatrix (same for all local rows).
3334 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3335            off-diagonal portion of the local submatrix (possibly different for
3336            each block row) or PETSC_NULL.
3337 
3338    If the *_nnz parameter is given then the *_nz parameter is ignored
3339 
3340    Options Database Keys:
3341 +   -mat_block_size - size of the blocks to use
3342 -   -mat_use_hash_table <fact>
3343 
3344    Notes:
3345    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3346    than it must be used on all processors that share the object for that argument.
3347 
3348    Storage Information:
3349    For a square global matrix we define each processor's diagonal portion
3350    to be its local rows and the corresponding columns (a square submatrix);
3351    each processor's off-diagonal portion encompasses the remainder of the
3352    local matrix (a rectangular submatrix).
3353 
3354    The user can specify preallocated storage for the diagonal part of
3355    the local submatrix with either d_nz or d_nnz (not both).  Set
3356    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3357    memory allocation.  Likewise, specify preallocated storage for the
3358    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3359 
3360    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3361    the figure below we depict these three local rows and all columns (0-11).
3362 
3363 .vb
3364            0 1 2 3 4 5 6 7 8 9 10 11
3365           -------------------
3366    row 3  |  o o o d d d o o o o o o
3367    row 4  |  o o o d d d o o o o o o
3368    row 5  |  o o o d d d o o o o o o
3369           -------------------
3370 .ve
3371 
3372    Thus, any entries in the d locations are stored in the d (diagonal)
3373    submatrix, and any entries in the o locations are stored in the
3374    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3375    stored simply in the MATSEQBAIJ format for compressed row storage.
3376 
3377    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3378    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3379    In general, for PDE problems in which most nonzeros are near the diagonal,
3380    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3381    or you will get TERRIBLE performance; see the users' manual chapter on
3382    matrices.
3383 
3384    You can call MatGetInfo() to get information on how effective the preallocation was;
3385    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3386    You can also run with the option -info and look for messages with the string
3387    malloc in them to see if additional memory allocation was needed.
3388 
3389    Level: intermediate
3390 
3391 .keywords: matrix, block, aij, compressed row, sparse, parallel
3392 
3393 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3394 @*/
3395 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3396 {
3397   PetscErrorCode ierr;
3398 
3399   PetscFunctionBegin;
3400   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3401   PetscFunctionReturn(0);
3402 }
3403 
3404 #undef __FUNCT__
3405 #define __FUNCT__ "MatCreateMPIBAIJ"
3406 /*@C
3407    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3408    (block compressed row).  For good matrix assembly performance
3409    the user should preallocate the matrix storage by setting the parameters
3410    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3411    performance can be increased by more than a factor of 50.
3412 
3413    Collective on MPI_Comm
3414 
3415    Input Parameters:
3416 +  comm - MPI communicator
3417 .  bs   - size of blockk
3418 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3419            This value should be the same as the local size used in creating the
3420            y vector for the matrix-vector product y = Ax.
3421 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3422            This value should be the same as the local size used in creating the
3423            x vector for the matrix-vector product y = Ax.
3424 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3425 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3426 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3427            submatrix  (same for all local rows)
3428 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3429            of the in diagonal portion of the local (possibly different for each block
3430            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3431 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3432            submatrix (same for all local rows).
3433 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3434            off-diagonal portion of the local submatrix (possibly different for
3435            each block row) or PETSC_NULL.
3436 
3437    Output Parameter:
3438 .  A - the matrix
3439 
3440    Options Database Keys:
3441 +   -mat_block_size - size of the blocks to use
3442 -   -mat_use_hash_table <fact>
3443 
3444    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3445    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3446    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3447 
3448    Notes:
3449    If the *_nnz parameter is given then the *_nz parameter is ignored
3450 
3451    A nonzero block is any block that as 1 or more nonzeros in it
3452 
3453    The user MUST specify either the local or global matrix dimensions
3454    (possibly both).
3455 
3456    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3457    than it must be used on all processors that share the object for that argument.
3458 
3459    Storage Information:
3460    For a square global matrix we define each processor's diagonal portion
3461    to be its local rows and the corresponding columns (a square submatrix);
3462    each processor's off-diagonal portion encompasses the remainder of the
3463    local matrix (a rectangular submatrix).
3464 
3465    The user can specify preallocated storage for the diagonal part of
3466    the local submatrix with either d_nz or d_nnz (not both).  Set
3467    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3468    memory allocation.  Likewise, specify preallocated storage for the
3469    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3470 
3471    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3472    the figure below we depict these three local rows and all columns (0-11).
3473 
3474 .vb
3475            0 1 2 3 4 5 6 7 8 9 10 11
3476           -------------------
3477    row 3  |  o o o d d d o o o o o o
3478    row 4  |  o o o d d d o o o o o o
3479    row 5  |  o o o d d d o o o o o o
3480           -------------------
3481 .ve
3482 
3483    Thus, any entries in the d locations are stored in the d (diagonal)
3484    submatrix, and any entries in the o locations are stored in the
3485    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3486    stored simply in the MATSEQBAIJ format for compressed row storage.
3487 
3488    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3489    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3490    In general, for PDE problems in which most nonzeros are near the diagonal,
3491    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3492    or you will get TERRIBLE performance; see the users' manual chapter on
3493    matrices.
3494 
3495    Level: intermediate
3496 
3497 .keywords: matrix, block, aij, compressed row, sparse, parallel
3498 
3499 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3500 @*/
3501 PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3502 {
3503   PetscErrorCode ierr;
3504   PetscMPIInt    size;
3505 
3506   PetscFunctionBegin;
3507   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3508   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3509   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3510   if (size > 1) {
3511     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3512     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3513   } else {
3514     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3515     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3516   }
3517   PetscFunctionReturn(0);
3518 }
3519 
3520 #undef __FUNCT__
3521 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3522 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3523 {
3524   Mat            mat;
3525   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3526   PetscErrorCode ierr;
3527   PetscInt       len=0;
3528 
3529   PetscFunctionBegin;
3530   *newmat       = 0;
3531   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3532   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3533   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3534   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3535 
3536   mat->factortype   = matin->factortype;
3537   mat->preallocated = PETSC_TRUE;
3538   mat->assembled    = PETSC_TRUE;
3539   mat->insertmode   = NOT_SET_VALUES;
3540 
3541   a      = (Mat_MPIBAIJ*)mat->data;
3542   mat->rmap->bs  = matin->rmap->bs;
3543   a->bs2   = oldmat->bs2;
3544   a->mbs   = oldmat->mbs;
3545   a->nbs   = oldmat->nbs;
3546   a->Mbs   = oldmat->Mbs;
3547   a->Nbs   = oldmat->Nbs;
3548 
3549   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3550   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3551 
3552   a->size         = oldmat->size;
3553   a->rank         = oldmat->rank;
3554   a->donotstash   = oldmat->donotstash;
3555   a->roworiented  = oldmat->roworiented;
3556   a->rowindices   = 0;
3557   a->rowvalues    = 0;
3558   a->getrowactive = PETSC_FALSE;
3559   a->barray       = 0;
3560   a->rstartbs     = oldmat->rstartbs;
3561   a->rendbs       = oldmat->rendbs;
3562   a->cstartbs     = oldmat->cstartbs;
3563   a->cendbs       = oldmat->cendbs;
3564 
3565   /* hash table stuff */
3566   a->ht           = 0;
3567   a->hd           = 0;
3568   a->ht_size      = 0;
3569   a->ht_flag      = oldmat->ht_flag;
3570   a->ht_fact      = oldmat->ht_fact;
3571   a->ht_total_ct  = 0;
3572   a->ht_insert_ct = 0;
3573 
3574   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3575   if (oldmat->colmap) {
3576 #if defined (PETSC_USE_CTABLE)
3577   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3578 #else
3579   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3580   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3581   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3582 #endif
3583   } else a->colmap = 0;
3584 
3585   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3586     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3587     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3588     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3589   } else a->garray = 0;
3590 
3591   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3592   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3593   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3594   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3595   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3596 
3597   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3598   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3599   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3600   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3601   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3602   *newmat = mat;
3603 
3604   PetscFunctionReturn(0);
3605 }
3606 
3607 #undef __FUNCT__
3608 #define __FUNCT__ "MatLoad_MPIBAIJ"
3609 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3610 {
3611   PetscErrorCode ierr;
3612   int            fd;
3613   PetscInt       i,nz,j,rstart,rend;
3614   PetscScalar    *vals,*buf;
3615   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3616   MPI_Status     status;
3617   PetscMPIInt    rank,size,maxnz;
3618   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3619   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3620   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3621   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3622   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3623   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3624 
3625   PetscFunctionBegin;
3626   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3627     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3628   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3629 
3630   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3631   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3632   if (!rank) {
3633     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3634     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3635     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3636   }
3637 
3638   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3639 
3640   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3641   M = header[1]; N = header[2];
3642 
3643   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3644   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3645   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3646 
3647   /* If global sizes are set, check if they are consistent with that given in the file */
3648   if (sizesset) {
3649     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3650   }
3651   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3652   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3653 
3654   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3655 
3656   /*
3657      This code adds extra rows to make sure the number of rows is
3658      divisible by the blocksize
3659   */
3660   Mbs        = M/bs;
3661   extra_rows = bs - M + bs*Mbs;
3662   if (extra_rows == bs) extra_rows = 0;
3663   else                  Mbs++;
3664   if (extra_rows && !rank) {
3665     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3666   }
3667 
3668   /* determine ownership of all rows */
3669   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3670     mbs        = Mbs/size + ((Mbs % size) > rank);
3671     m          = mbs*bs;
3672   } else { /* User set */
3673     m          = newmat->rmap->n;
3674     mbs        = m/bs;
3675   }
3676   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3677   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3678 
3679   /* process 0 needs enough room for process with most rows */
3680   if (!rank) {
3681     mmax = rowners[1];
3682     for (i=2; i<size; i++) {
3683       mmax = PetscMax(mmax,rowners[i]);
3684     }
3685     mmax*=bs;
3686   } else mmax = m;
3687 
3688   rowners[0] = 0;
3689   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3690   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3691   rstart = rowners[rank];
3692   rend   = rowners[rank+1];
3693 
3694   /* distribute row lengths to all processors */
3695   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3696   if (!rank) {
3697     mend = m;
3698     if (size == 1) mend = mend - extra_rows;
3699     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3700     for (j=mend; j<m; j++) locrowlens[j] = 1;
3701     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3702     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3703     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3704     for (j=0; j<m; j++) {
3705       procsnz[0] += locrowlens[j];
3706     }
3707     for (i=1; i<size; i++) {
3708       mend = browners[i+1] - browners[i];
3709       if (i == size-1) mend = mend - extra_rows;
3710       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3711       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3712       /* calculate the number of nonzeros on each processor */
3713       for (j=0; j<browners[i+1]-browners[i]; j++) {
3714         procsnz[i] += rowlengths[j];
3715       }
3716       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3717     }
3718     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3719   } else {
3720     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3721   }
3722 
3723   if (!rank) {
3724     /* determine max buffer needed and allocate it */
3725     maxnz = procsnz[0];
3726     for (i=1; i<size; i++) {
3727       maxnz = PetscMax(maxnz,procsnz[i]);
3728     }
3729     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3730 
3731     /* read in my part of the matrix column indices  */
3732     nz     = procsnz[0];
3733     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3734     mycols = ibuf;
3735     if (size == 1)  nz -= extra_rows;
3736     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3737     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3738 
3739     /* read in every ones (except the last) and ship off */
3740     for (i=1; i<size-1; i++) {
3741       nz   = procsnz[i];
3742       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3743       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3744     }
3745     /* read in the stuff for the last proc */
3746     if (size != 1) {
3747       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3748       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3749       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3750       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3751     }
3752     ierr = PetscFree(cols);CHKERRQ(ierr);
3753   } else {
3754     /* determine buffer space needed for message */
3755     nz = 0;
3756     for (i=0; i<m; i++) {
3757       nz += locrowlens[i];
3758     }
3759     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3760     mycols = ibuf;
3761     /* receive message of column indices*/
3762     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3763     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3764     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3765   }
3766 
3767   /* loop over local rows, determining number of off diagonal entries */
3768   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3769   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3770   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3771   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3772   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3773   rowcount = 0; nzcount = 0;
3774   for (i=0; i<mbs; i++) {
3775     dcount  = 0;
3776     odcount = 0;
3777     for (j=0; j<bs; j++) {
3778       kmax = locrowlens[rowcount];
3779       for (k=0; k<kmax; k++) {
3780         tmp = mycols[nzcount++]/bs;
3781         if (!mask[tmp]) {
3782           mask[tmp] = 1;
3783           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3784           else masked1[dcount++] = tmp;
3785         }
3786       }
3787       rowcount++;
3788     }
3789 
3790     dlens[i]  = dcount;
3791     odlens[i] = odcount;
3792 
3793     /* zero out the mask elements we set */
3794     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3795     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3796   }
3797 
3798 
3799   if (!sizesset) {
3800     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3801   }
3802   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3803 
3804   if (!rank) {
3805     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3806     /* read in my part of the matrix numerical values  */
3807     nz = procsnz[0];
3808     vals = buf;
3809     mycols = ibuf;
3810     if (size == 1)  nz -= extra_rows;
3811     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3812     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3813 
3814     /* insert into matrix */
3815     jj      = rstart*bs;
3816     for (i=0; i<m; i++) {
3817       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3818       mycols += locrowlens[i];
3819       vals   += locrowlens[i];
3820       jj++;
3821     }
3822     /* read in other processors (except the last one) and ship out */
3823     for (i=1; i<size-1; i++) {
3824       nz   = procsnz[i];
3825       vals = buf;
3826       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3827       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3828     }
3829     /* the last proc */
3830     if (size != 1){
3831       nz   = procsnz[i] - extra_rows;
3832       vals = buf;
3833       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3834       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3835       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3836     }
3837     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3838   } else {
3839     /* receive numeric values */
3840     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3841 
3842     /* receive message of values*/
3843     vals   = buf;
3844     mycols = ibuf;
3845     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3846     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3847     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3848 
3849     /* insert into matrix */
3850     jj      = rstart*bs;
3851     for (i=0; i<m; i++) {
3852       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3853       mycols += locrowlens[i];
3854       vals   += locrowlens[i];
3855       jj++;
3856     }
3857   }
3858   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3859   ierr = PetscFree(buf);CHKERRQ(ierr);
3860   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3861   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3862   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3863   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3864   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3865   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3866 
3867   PetscFunctionReturn(0);
3868 }
3869 
3870 #undef __FUNCT__
3871 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3872 /*@
3873    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3874 
3875    Input Parameters:
3876 .  mat  - the matrix
3877 .  fact - factor
3878 
3879    Not Collective, each process can use a different factor
3880 
3881    Level: advanced
3882 
3883   Notes:
3884    This can also be set by the command line option: -mat_use_hash_table <fact>
3885 
3886 .keywords: matrix, hashtable, factor, HT
3887 
3888 .seealso: MatSetOption()
3889 @*/
3890 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3891 {
3892   PetscErrorCode ierr;
3893 
3894   PetscFunctionBegin;
3895   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 EXTERN_C_BEGIN
3900 #undef __FUNCT__
3901 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3902 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3903 {
3904   Mat_MPIBAIJ *baij;
3905 
3906   PetscFunctionBegin;
3907   baij = (Mat_MPIBAIJ*)mat->data;
3908   baij->ht_fact = fact;
3909   PetscFunctionReturn(0);
3910 }
3911 EXTERN_C_END
3912 
3913 #undef __FUNCT__
3914 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3915 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3916 {
3917   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3918   PetscFunctionBegin;
3919   *Ad     = a->A;
3920   *Ao     = a->B;
3921   *colmap = a->garray;
3922   PetscFunctionReturn(0);
3923 }
3924 
3925 /*
3926     Special version for direct calls from Fortran (to eliminate two function call overheads
3927 */
3928 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3929 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3930 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3931 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3932 #endif
3933 
3934 #undef __FUNCT__
3935 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3936 /*@C
3937   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3938 
3939   Collective on Mat
3940 
3941   Input Parameters:
3942 + mat - the matrix
3943 . min - number of input rows
3944 . im - input rows
3945 . nin - number of input columns
3946 . in - input columns
3947 . v - numerical values input
3948 - addvin - INSERT_VALUES or ADD_VALUES
3949 
3950   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3951 
3952   Level: advanced
3953 
3954 .seealso:   MatSetValuesBlocked()
3955 @*/
3956 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3957 {
3958   /* convert input arguments to C version */
3959   Mat             mat = *matin;
3960   PetscInt        m = *min, n = *nin;
3961   InsertMode      addv = *addvin;
3962 
3963   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3964   const MatScalar *value;
3965   MatScalar       *barray=baij->barray;
3966   PetscBool       roworiented = baij->roworiented;
3967   PetscErrorCode  ierr;
3968   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3969   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3970   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3971 
3972   PetscFunctionBegin;
3973   /* tasks normally handled by MatSetValuesBlocked() */
3974   if (mat->insertmode == NOT_SET_VALUES) {
3975     mat->insertmode = addv;
3976   }
3977 #if defined(PETSC_USE_DEBUG)
3978   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3979   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3980 #endif
3981   if (mat->assembled) {
3982     mat->was_assembled = PETSC_TRUE;
3983     mat->assembled     = PETSC_FALSE;
3984   }
3985   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3986 
3987 
3988   if(!barray) {
3989     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3990     baij->barray = barray;
3991   }
3992 
3993   if (roworiented) {
3994     stepval = (n-1)*bs;
3995   } else {
3996     stepval = (m-1)*bs;
3997   }
3998   for (i=0; i<m; i++) {
3999     if (im[i] < 0) continue;
4000 #if defined(PETSC_USE_DEBUG)
4001     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4002 #endif
4003     if (im[i] >= rstart && im[i] < rend) {
4004       row = im[i] - rstart;
4005       for (j=0; j<n; j++) {
4006         /* If NumCol = 1 then a copy is not required */
4007         if ((roworiented) && (n == 1)) {
4008           barray = (MatScalar*)v + i*bs2;
4009         } else if((!roworiented) && (m == 1)) {
4010           barray = (MatScalar*)v + j*bs2;
4011         } else { /* Here a copy is required */
4012           if (roworiented) {
4013             value = v + i*(stepval+bs)*bs + j*bs;
4014           } else {
4015             value = v + j*(stepval+bs)*bs + i*bs;
4016           }
4017           for (ii=0; ii<bs; ii++,value+=stepval) {
4018             for (jj=0; jj<bs; jj++) {
4019               *barray++  = *value++;
4020             }
4021           }
4022           barray -=bs2;
4023         }
4024 
4025         if (in[j] >= cstart && in[j] < cend){
4026           col  = in[j] - cstart;
4027           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4028         }
4029         else if (in[j] < 0) continue;
4030 #if defined(PETSC_USE_DEBUG)
4031         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4032 #endif
4033         else {
4034           if (mat->was_assembled) {
4035             if (!baij->colmap) {
4036               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4037             }
4038 
4039 #if defined(PETSC_USE_DEBUG)
4040 #if defined (PETSC_USE_CTABLE)
4041             { PetscInt data;
4042               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4043               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4044             }
4045 #else
4046             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4047 #endif
4048 #endif
4049 #if defined (PETSC_USE_CTABLE)
4050 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4051             col  = (col - 1)/bs;
4052 #else
4053             col = (baij->colmap[in[j]] - 1)/bs;
4054 #endif
4055             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4056               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4057               col =  in[j];
4058             }
4059           }
4060           else col = in[j];
4061           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4062         }
4063       }
4064     } else {
4065       if (!baij->donotstash) {
4066         if (roworiented) {
4067           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4068         } else {
4069           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4070         }
4071       }
4072     }
4073   }
4074 
4075   /* task normally handled by MatSetValuesBlocked() */
4076   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4077   PetscFunctionReturn(0);
4078 }
4079 
4080 #undef __FUNCT__
4081 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4082 /*@
4083      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4084          CSR format the local rows.
4085 
4086    Collective on MPI_Comm
4087 
4088    Input Parameters:
4089 +  comm - MPI communicator
4090 .  bs - the block size, only a block size of 1 is supported
4091 .  m - number of local rows (Cannot be PETSC_DECIDE)
4092 .  n - This value should be the same as the local size used in creating the
4093        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4094        calculated if N is given) For square matrices n is almost always m.
4095 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4096 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4097 .   i - row indices
4098 .   j - column indices
4099 -   a - matrix values
4100 
4101    Output Parameter:
4102 .   mat - the matrix
4103 
4104    Level: intermediate
4105 
4106    Notes:
4107        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4108      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4109      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4110 
4111        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4112 
4113 .keywords: matrix, aij, compressed row, sparse, parallel
4114 
4115 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4116           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4117 @*/
4118 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4119 {
4120   PetscErrorCode ierr;
4121 
4122 
4123  PetscFunctionBegin;
4124   if (i[0]) {
4125     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4126   }
4127   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4128   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4129   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4130   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4131   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4132   PetscFunctionReturn(0);
4133 }
4134