xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 7d8118323a4b084c8e4ece7cc84f635667bda1e6)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         if (roworiented) {
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
263         } else {
264           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
275 {
276   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
277   const PetscScalar *value;
278   MatScalar         *barray=baij->barray;
279   PetscBool         roworiented = baij->roworiented;
280   PetscErrorCode    ierr;
281   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
282   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
283   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
284 
285   PetscFunctionBegin;
286   if(!barray) {
287     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
288     baij->barray = barray;
289   }
290 
291   if (roworiented) {
292     stepval = (n-1)*bs;
293   } else {
294     stepval = (m-1)*bs;
295   }
296   for (i=0; i<m; i++) {
297     if (im[i] < 0) continue;
298 #if defined(PETSC_USE_DEBUG)
299     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
300 #endif
301     if (im[i] >= rstart && im[i] < rend) {
302       row = im[i] - rstart;
303       for (j=0; j<n; j++) {
304         /* If NumCol = 1 then a copy is not required */
305         if ((roworiented) && (n == 1)) {
306           barray = (MatScalar*)v + i*bs2;
307         } else if((!roworiented) && (m == 1)) {
308           barray = (MatScalar*)v + j*bs2;
309         } else { /* Here a copy is required */
310           if (roworiented) {
311             value = v + (i*(stepval+bs) + j)*bs;
312           } else {
313 	    value = v + (j*(stepval+bs) + i)*bs;
314           }
315           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
316             for (jj=0; jj<bs; jj++) {
317               barray[jj]  = value[jj];
318             }
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend){
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         }
328         else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined (PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined (PETSC_USE_CTABLE)
349 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
355               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col =  in[j];
357             }
358           }
359           else col = in[j];
360           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
361         }
362       }
363     } else {
364       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscBool      roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399   if (v) PetscValidScalarPointer(v,6);
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscBool         roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = sqrt(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
662       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
663       v = amat->a; jj = amat->j;
664       for (i=0; i<amat->nz; i++) {
665         for (j=0; j<bs; j++){
666           col = bs*(cstart + *jj) + j; /* column index */
667           for (row=0; row<bs; row++){
668             tmp[col] += PetscAbsScalar(*v);  v++;
669           }
670         }
671         jj++;
672       }
673       v = bmat->a; jj = bmat->j;
674       for (i=0; i<bmat->nz; i++) {
675         for (j=0; j<bs; j++){
676           col = bs*garray[*jj] + j;
677           for (row=0; row<bs; row++){
678             tmp[col] += PetscAbsScalar(*v); v++;
679           }
680         }
681         jj++;
682       }
683       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
684       *nrm = 0.0;
685       for (j=0; j<mat->cmap->N; j++) {
686         if (tmp2[j] > *nrm) *nrm = tmp2[j];
687       }
688       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY) { /* max row sum */
690       PetscReal *sums;
691       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
692       sum = 0.0;
693       for (j=0; j<amat->mbs; j++) {
694         for (row=0; row<bs; row++) sums[row] = 0.0;
695         v = amat->a + bs2*amat->i[j];
696         nz = amat->i[j+1]-amat->i[j];
697         for (i=0; i<nz; i++) {
698           for (col=0; col<bs; col++){
699             for (row=0; row<bs; row++){
700               sums[row] += PetscAbsScalar(*v); v++;
701             }
702           }
703         }
704         v = bmat->a + bs2*bmat->i[j];
705         nz = bmat->i[j+1]-bmat->i[j];
706         for (i=0; i<nz; i++) {
707           for (col=0; col<bs; col++){
708             for (row=0; row<bs; row++){
709               sums[row] += PetscAbsScalar(*v); v++;
710             }
711           }
712         }
713         for (row=0; row<bs; row++){
714           if (sums[row] > sum) sum = sums[row];
715         }
716       }
717       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
718       ierr = PetscFree(sums);CHKERRQ(ierr);
719     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 /*
725   Creates the hash table, and sets the table
726   This table is created only once.
727   If new entried need to be added to the matrix
728   then the hash table has to be destroyed and
729   recreated.
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
734 {
735   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
736   Mat            A = baij->A,B=baij->B;
737   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
738   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
739   PetscErrorCode ierr;
740   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
741   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
742   PetscInt       *HT,key;
743   MatScalar      **HD;
744   PetscReal      tmp;
745 #if defined(PETSC_USE_INFO)
746   PetscInt       ct=0,max=0;
747 #endif
748 
749   PetscFunctionBegin;
750   if (baij->ht) PetscFunctionReturn(0);
751 
752   baij->ht_size = (PetscInt)(factor*nz);
753   ht_size       = baij->ht_size;
754 
755   /* Allocate Memory for Hash Table */
756   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
757   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
759   HD   = baij->hd;
760   HT   = baij->ht;
761 
762   /* Loop Over A */
763   for (i=0; i<a->mbs; i++) {
764     for (j=ai[i]; j<ai[i+1]; j++) {
765       row = i+rstart;
766       col = aj[j]+cstart;
767 
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++){
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = a->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786   /* Loop Over B */
787   for (i=0; i<b->mbs; i++) {
788     for (j=bi[i]; j<bi[i+1]; j++) {
789       row = i+rstart;
790       col = garray[bj[j]];
791       key = row*Nbs + col + 1;
792       h1  = HASH(ht_size,key,tmp);
793       for (k=0; k<ht_size; k++){
794         if (!HT[(h1+k)%ht_size]) {
795           HT[(h1+k)%ht_size] = key;
796           HD[(h1+k)%ht_size] = b->a + j*bs2;
797           break;
798 #if defined(PETSC_USE_INFO)
799         } else {
800           ct++;
801 #endif
802         }
803       }
804 #if defined(PETSC_USE_INFO)
805       if (k> max) max = k;
806 #endif
807     }
808   }
809 
810   /* Print Summary */
811 #if defined(PETSC_USE_INFO)
812   for (i=0,j=0; i<ht_size; i++) {
813     if (HT[i]) {j++;}
814   }
815   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
816 #endif
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
823 {
824   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
825   PetscErrorCode ierr;
826   PetscInt       nstash,reallocs;
827   InsertMode     addv;
828 
829   PetscFunctionBegin;
830   if (baij->donotstash || mat->nooffprocentries) {
831     PetscFunctionReturn(0);
832   }
833 
834   /* make sure all processors are either in INSERTMODE or ADDMODE */
835   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
836   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837   mat->insertmode = addv; /* in case this processor had no cache */
838 
839   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
840   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
841   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
842   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
851 {
852   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
853   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
854   PetscErrorCode ierr;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscBool      r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   InsertMode     addv = mat->insertmode;
860   PetscMPIInt    n;
861 
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   PetscFunctionBegin;
864   if (!baij->donotstash && !mat->nooffprocentries) {
865     while (1) {
866       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
867       if (!flg) break;
868 
869       for (i=0; i<n;) {
870         /* Now identify the consecutive vals belonging to the same row */
871         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
872         if (j < n) ncols = j-i;
873         else       ncols = n-i;
874         /* Now assemble all these values with a single function call */
875         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
876         i = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
880     /* Now process the block-stash. Since the values are stashed column-oriented,
881        set the roworiented flag to column oriented, and after MatSetValues()
882        restore the original flags */
883     r1 = baij->roworiented;
884     r2 = a->roworiented;
885     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
886     baij->roworiented = PETSC_FALSE;
887     a->roworiented    = PETSC_FALSE;
888     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
889     while (1) {
890       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
891       if (!flg) break;
892 
893       for (i=0; i<n;) {
894         /* Now identify the consecutive vals belonging to the same row */
895         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
896         if (j < n) ncols = j-i;
897         else       ncols = n-i;
898         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
899         i = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
903     baij->roworiented = r1;
904     a->roworiented    = r2;
905     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
906   }
907 
908   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
910 
911   /* determine if any processor has disassembled, if so we must
912      also disassemble ourselfs, in order that we may reassemble. */
913   /*
914      if nonzero structure of submatrix B cannot change then we know that
915      no processor disassembled thus we can skip this stuff
916   */
917   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
918     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
919     if (mat->was_assembled && !other_disassembled) {
920       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
921     }
922   }
923 
924   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
925     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
926   }
927   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
928   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
930 
931 #if defined(PETSC_USE_INFO)
932   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
933     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
934     baij->ht_total_ct  = 0;
935     baij->ht_insert_ct = 0;
936   }
937 #endif
938   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
939     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
940     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
941     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
942   }
943 
944   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
945   baij->rowvalues = 0;
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
952 {
953   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
954   PetscErrorCode    ierr;
955   PetscMPIInt       size = baij->size,rank = baij->rank;
956   PetscInt          bs = mat->rmap->bs;
957   PetscBool         iascii,isdraw;
958   PetscViewer       sviewer;
959   PetscViewerFormat format;
960 
961   PetscFunctionBegin;
962   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
964   if (iascii) {
965     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
966     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
967       MatInfo info;
968       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
969       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
970       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
972              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
973       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
974       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
979       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
980       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
981       PetscFunctionReturn(0);
982     } else if (format == PETSC_VIEWER_ASCII_INFO) {
983       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
984       PetscFunctionReturn(0);
985     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
986       PetscFunctionReturn(0);
987     }
988   }
989 
990   if (isdraw) {
991     PetscDraw       draw;
992     PetscBool  isnull;
993     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
994     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
995   }
996 
997   if (size == 1) {
998     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
999     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1000   } else {
1001     /* assemble the entire matrix onto first processor. */
1002     Mat         A;
1003     Mat_SeqBAIJ *Aloc;
1004     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1005     MatScalar   *a;
1006 
1007     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1008     /* Perhaps this should be the type of mat? */
1009     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1010     if (!rank) {
1011       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1012     } else {
1013       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1014     }
1015     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1016     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1017     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1018 
1019     /* copy over the A part */
1020     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1021     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1022     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1023 
1024     for (i=0; i<mbs; i++) {
1025       rvals[0] = bs*(baij->rstartbs + i);
1026       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1027       for (j=ai[i]; j<ai[i+1]; j++) {
1028         col = (baij->cstartbs+aj[j])*bs;
1029         for (k=0; k<bs; k++) {
1030           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1031           col++; a += bs;
1032         }
1033       }
1034     }
1035     /* copy over the B part */
1036     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1037     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1038     for (i=0; i<mbs; i++) {
1039       rvals[0] = bs*(baij->rstartbs + i);
1040       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1041       for (j=ai[i]; j<ai[i+1]; j++) {
1042         col = baij->garray[aj[j]]*bs;
1043         for (k=0; k<bs; k++) {
1044           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1045           col++; a += bs;
1046         }
1047       }
1048     }
1049     ierr = PetscFree(rvals);CHKERRQ(ierr);
1050     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1051     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     /*
1053        Everyone has to call to draw the matrix since the graphics waits are
1054        synchronized across all processors that share the PetscDraw object
1055     */
1056     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1057     if (!rank) {
1058       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1059     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1060       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1061       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1062     }
1063     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1064     ierr = MatDestroy(&A);CHKERRQ(ierr);
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1071 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1072 {
1073   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1074   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1075   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1076   PetscErrorCode ierr;
1077   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1078   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1079   int            fd;
1080   PetscScalar    *column_values;
1081   FILE           *file;
1082   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1083   PetscInt       message_count,flowcontrolcount;
1084 
1085   PetscFunctionBegin;
1086   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1087   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1088   nz   = bs2*(A->nz + B->nz);
1089   rlen = mat->rmap->n;
1090   if (!rank) {
1091     header[0] = MAT_FILE_CLASSID;
1092     header[1] = mat->rmap->N;
1093     header[2] = mat->cmap->N;
1094     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1095     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1096     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1097     /* get largest number of rows any processor has */
1098     range = mat->rmap->range;
1099     for (i=1; i<size; i++) {
1100       rlen = PetscMax(rlen,range[i+1] - range[i]);
1101     }
1102   } else {
1103     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1104   }
1105 
1106   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1107   /* compute lengths of each row  */
1108   for (i=0; i<a->mbs; i++) {
1109     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1110   }
1111   /* store the row lengths to the file */
1112   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1113   if (!rank) {
1114     MPI_Status status;
1115     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1116     rlen  = (range[1] - range[0])/bs;
1117     for (i=0; i<rlen; i++) {
1118       for (j=0; j<bs; j++) {
1119         row_lens[i*bs+j] = bs*crow_lens[i];
1120       }
1121     }
1122     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1123     for (i=1; i<size; i++) {
1124       rlen = (range[i+1] - range[i])/bs;
1125       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1126       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1127       for (k=0; k<rlen; k++) {
1128 	for (j=0; j<bs; j++) {
1129 	  row_lens[k*bs+j] = bs*crow_lens[k];
1130 	}
1131       }
1132       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1133     }
1134     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1135     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1136   } else {
1137     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1138     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1139     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1140   }
1141   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1142 
1143   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1144      information needed to make it for each row from a block row. This does require more communication but still not more than
1145      the communication needed for the nonzero values  */
1146   nzmax = nz; /*  space a largest processor needs */
1147   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1148   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1149   cnt  = 0;
1150   for (i=0; i<a->mbs; i++) {
1151     pcnt = cnt;
1152     for (j=B->i[i]; j<B->i[i+1]; j++) {
1153       if ( (col = garray[B->j[j]]) > cstart) break;
1154       for (l=0; l<bs; l++) {
1155 	column_indices[cnt++] = bs*col+l;
1156       }
1157     }
1158     for (k=A->i[i]; k<A->i[i+1]; k++) {
1159       for (l=0; l<bs; l++) {
1160         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1161       }
1162     }
1163     for (; j<B->i[i+1]; j++) {
1164       for (l=0; l<bs; l++) {
1165         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1166       }
1167     }
1168     len = cnt - pcnt;
1169     for (k=1; k<bs; k++) {
1170       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1171       cnt += len;
1172     }
1173   }
1174   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1175 
1176   /* store the columns to the file */
1177   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1178   if (!rank) {
1179     MPI_Status status;
1180     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1181     for (i=1; i<size; i++) {
1182       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1183       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1184       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1185       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1188   } else {
1189     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1190     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1191     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1192     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1193   }
1194   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1195 
1196   /* load up the numerical values */
1197   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1198   cnt = 0;
1199   for (i=0; i<a->mbs; i++) {
1200     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1201     for (j=B->i[i]; j<B->i[i+1]; j++) {
1202       if ( garray[B->j[j]] > cstart) break;
1203       for (l=0; l<bs; l++) {
1204         for (ll=0; ll<bs; ll++) {
1205 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1206         }
1207       }
1208       cnt += bs;
1209     }
1210     for (k=A->i[i]; k<A->i[i+1]; k++) {
1211       for (l=0; l<bs; l++) {
1212         for (ll=0; ll<bs; ll++) {
1213           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1214         }
1215       }
1216       cnt += bs;
1217     }
1218     for (; j<B->i[i+1]; j++) {
1219       for (l=0; l<bs; l++) {
1220         for (ll=0; ll<bs; ll++) {
1221 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1222         }
1223       }
1224       cnt += bs;
1225     }
1226     cnt += (bs-1)*rlen;
1227   }
1228   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1229 
1230   /* store the column values to the file */
1231   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1232   if (!rank) {
1233     MPI_Status status;
1234     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1235     for (i=1; i<size; i++) {
1236       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1237       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1238       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1239       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1240     }
1241     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1242   } else {
1243     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1244     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1245     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1246     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1247   }
1248   ierr = PetscFree(column_values);CHKERRQ(ierr);
1249 
1250   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1251   if (file) {
1252     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1253   }
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatView_MPIBAIJ"
1259 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1260 {
1261   PetscErrorCode ierr;
1262   PetscBool      iascii,isdraw,issocket,isbinary;
1263 
1264   PetscFunctionBegin;
1265   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1266   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1269   if (iascii || isdraw || issocket) {
1270     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1271   } else if (isbinary) {
1272     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1273   } else {
1274     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1281 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1282 {
1283   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287 #if defined(PETSC_USE_LOG)
1288   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1289 #endif
1290   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1291   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1292   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1293   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1294 #if defined (PETSC_USE_CTABLE)
1295   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1296 #else
1297   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1298 #endif
1299   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1300   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1301   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1302   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1303   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1304   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1305   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1306   ierr = PetscFree(baij);CHKERRQ(ierr);
1307 
1308   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1309   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatMult_MPIBAIJ"
1322 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1323 {
1324   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326   PetscInt       nt;
1327 
1328   PetscFunctionBegin;
1329   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1330   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1331   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1332   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1333   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1335   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1342 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1343 {
1344   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1345   PetscErrorCode ierr;
1346 
1347   PetscFunctionBegin;
1348   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1349   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1350   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1351   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1357 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1358 {
1359   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1360   PetscErrorCode ierr;
1361   PetscBool      merged;
1362 
1363   PetscFunctionBegin;
1364   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1365   /* do nondiagonal part */
1366   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1367   if (!merged) {
1368     /* send it on its way */
1369     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1370     /* do local part */
1371     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1372     /* receive remote parts: note this assumes the values are not actually */
1373     /* inserted in yy until the next line */
1374     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1375   } else {
1376     /* do local part */
1377     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1378     /* send it on its way */
1379     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1380     /* values actually were received in the Begin() but we need to call this nop */
1381     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382   }
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1388 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1389 {
1390   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1391   PetscErrorCode ierr;
1392 
1393   PetscFunctionBegin;
1394   /* do nondiagonal part */
1395   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1396   /* send it on its way */
1397   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1398   /* do local part */
1399   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1400   /* receive remote parts: note this assumes the values are not actually */
1401   /* inserted in yy until the next line, which is true for my implementation*/
1402   /* but is not perhaps always true. */
1403   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 /*
1408   This only works correctly for square matrices where the subblock A->A is the
1409    diagonal block
1410 */
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1413 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1414 {
1415   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1420   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatScale_MPIBAIJ"
1426 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1427 {
1428   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1429   PetscErrorCode ierr;
1430 
1431   PetscFunctionBegin;
1432   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1433   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1439 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1440 {
1441   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1442   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1443   PetscErrorCode ierr;
1444   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1445   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1446   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1447 
1448   PetscFunctionBegin;
1449   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1450   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1451   mat->getrowactive = PETSC_TRUE;
1452 
1453   if (!mat->rowvalues && (idx || v)) {
1454     /*
1455         allocate enough space to hold information from the longest row.
1456     */
1457     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1458     PetscInt     max = 1,mbs = mat->mbs,tmp;
1459     for (i=0; i<mbs; i++) {
1460       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1461       if (max < tmp) { max = tmp; }
1462     }
1463     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1464   }
1465   lrow = row - brstart;
1466 
1467   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1468   if (!v)   {pvA = 0; pvB = 0;}
1469   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1470   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1471   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1472   nztot = nzA + nzB;
1473 
1474   cmap  = mat->garray;
1475   if (v  || idx) {
1476     if (nztot) {
1477       /* Sort by increasing column numbers, assuming A and B already sorted */
1478       PetscInt imark = -1;
1479       if (v) {
1480         *v = v_p = mat->rowvalues;
1481         for (i=0; i<nzB; i++) {
1482           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1483           else break;
1484         }
1485         imark = i;
1486         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1487         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1488       }
1489       if (idx) {
1490         *idx = idx_p = mat->rowindices;
1491         if (imark > -1) {
1492           for (i=0; i<imark; i++) {
1493             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1494           }
1495         } else {
1496           for (i=0; i<nzB; i++) {
1497             if (cmap[cworkB[i]/bs] < cstart)
1498               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1499             else break;
1500           }
1501           imark = i;
1502         }
1503         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1504         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1505       }
1506     } else {
1507       if (idx) *idx = 0;
1508       if (v)   *v   = 0;
1509     }
1510   }
1511   *nz = nztot;
1512   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1513   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1519 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1520 {
1521   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1522 
1523   PetscFunctionBegin;
1524   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1525   baij->getrowactive = PETSC_FALSE;
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1531 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1532 {
1533   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1538   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1544 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1545 {
1546   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1547   Mat            A = a->A,B = a->B;
1548   PetscErrorCode ierr;
1549   PetscReal      isend[5],irecv[5];
1550 
1551   PetscFunctionBegin;
1552   info->block_size     = (PetscReal)matin->rmap->bs;
1553   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1554   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1555   isend[3] = info->memory;  isend[4] = info->mallocs;
1556   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1557   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1558   isend[3] += info->memory;  isend[4] += info->mallocs;
1559   if (flag == MAT_LOCAL) {
1560     info->nz_used      = isend[0];
1561     info->nz_allocated = isend[1];
1562     info->nz_unneeded  = isend[2];
1563     info->memory       = isend[3];
1564     info->mallocs      = isend[4];
1565   } else if (flag == MAT_GLOBAL_MAX) {
1566     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1567     info->nz_used      = irecv[0];
1568     info->nz_allocated = irecv[1];
1569     info->nz_unneeded  = irecv[2];
1570     info->memory       = irecv[3];
1571     info->mallocs      = irecv[4];
1572   } else if (flag == MAT_GLOBAL_SUM) {
1573     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1574     info->nz_used      = irecv[0];
1575     info->nz_allocated = irecv[1];
1576     info->nz_unneeded  = irecv[2];
1577     info->memory       = irecv[3];
1578     info->mallocs      = irecv[4];
1579   } else {
1580     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1581   }
1582   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1583   info->fill_ratio_needed = 0;
1584   info->factor_mallocs    = 0;
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1590 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1591 {
1592   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   switch (op) {
1597   case MAT_NEW_NONZERO_LOCATIONS:
1598   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1599   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1600   case MAT_KEEP_NONZERO_PATTERN:
1601   case MAT_NEW_NONZERO_LOCATION_ERR:
1602     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1603     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1604     break;
1605   case MAT_ROW_ORIENTED:
1606     a->roworiented = flg;
1607     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1608     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1609     break;
1610   case MAT_NEW_DIAGONALS:
1611     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1612     break;
1613   case MAT_IGNORE_OFF_PROC_ENTRIES:
1614     a->donotstash = flg;
1615     break;
1616   case MAT_USE_HASH_TABLE:
1617     a->ht_flag = flg;
1618     break;
1619   case MAT_SYMMETRIC:
1620   case MAT_STRUCTURALLY_SYMMETRIC:
1621   case MAT_HERMITIAN:
1622   case MAT_SYMMETRY_ETERNAL:
1623     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1624     break;
1625   default:
1626     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1627   }
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 #undef __FUNCT__
1632 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1633 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1634 {
1635   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1636   Mat_SeqBAIJ    *Aloc;
1637   Mat            B;
1638   PetscErrorCode ierr;
1639   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1640   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1641   MatScalar      *a;
1642 
1643   PetscFunctionBegin;
1644   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1645   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1646     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1647     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1648     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1649     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1650   } else {
1651     B = *matout;
1652   }
1653 
1654   /* copy over the A part */
1655   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1656   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1657   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1658 
1659   for (i=0; i<mbs; i++) {
1660     rvals[0] = bs*(baij->rstartbs + i);
1661     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1662     for (j=ai[i]; j<ai[i+1]; j++) {
1663       col = (baij->cstartbs+aj[j])*bs;
1664       for (k=0; k<bs; k++) {
1665         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1666         col++; a += bs;
1667       }
1668     }
1669   }
1670   /* copy over the B part */
1671   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1672   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1673   for (i=0; i<mbs; i++) {
1674     rvals[0] = bs*(baij->rstartbs + i);
1675     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1676     for (j=ai[i]; j<ai[i+1]; j++) {
1677       col = baij->garray[aj[j]]*bs;
1678       for (k=0; k<bs; k++) {
1679         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1680         col++; a += bs;
1681       }
1682     }
1683   }
1684   ierr = PetscFree(rvals);CHKERRQ(ierr);
1685   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687 
1688   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1689     *matout = B;
1690   } else {
1691     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1692   }
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1698 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1699 {
1700   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1701   Mat            a = baij->A,b = baij->B;
1702   PetscErrorCode ierr;
1703   PetscInt       s1,s2,s3;
1704 
1705   PetscFunctionBegin;
1706   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1707   if (rr) {
1708     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1709     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1710     /* Overlap communication with computation. */
1711     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1712   }
1713   if (ll) {
1714     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1715     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1716     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1717   }
1718   /* scale  the diagonal block */
1719   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1720 
1721   if (rr) {
1722     /* Do a scatter end and then right scale the off-diagonal block */
1723     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1724     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1725   }
1726 
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1732 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1733 {
1734   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1735   PetscErrorCode    ierr;
1736   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1737   PetscInt          i,*owners = A->rmap->range;
1738   PetscInt          *nprocs,j,idx,nsends,row;
1739   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1740   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1741   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1742   MPI_Comm          comm = ((PetscObject)A)->comm;
1743   MPI_Request       *send_waits,*recv_waits;
1744   MPI_Status        recv_status,*send_status;
1745   const PetscScalar *xx;
1746   PetscScalar       *bb;
1747 #if defined(PETSC_DEBUG)
1748   PetscBool         found = PETSC_FALSE;
1749 #endif
1750 
1751   PetscFunctionBegin;
1752   /*  first count number of contributors to each processor */
1753   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1754   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1756   j = 0;
1757   for (i=0; i<N; i++) {
1758     if (lastidx > (idx = rows[i])) j = 0;
1759     lastidx = idx;
1760     for (; j<size; j++) {
1761       if (idx >= owners[j] && idx < owners[j+1]) {
1762         nprocs[2*j]++;
1763         nprocs[2*j+1] = 1;
1764         owner[i] = j;
1765 #if defined(PETSC_DEBUG)
1766         found = PETSC_TRUE;
1767 #endif
1768         break;
1769       }
1770     }
1771 #if defined(PETSC_DEBUG)
1772     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1773     found = PETSC_FALSE;
1774 #endif
1775   }
1776   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1777 
1778   if (A->nooffproczerorows) {
1779     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1780     nrecvs = nsends;
1781     nmax   = N;
1782   } else {
1783     /* inform other processors of number of messages and max length*/
1784     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1785   }
1786 
1787   /* post receives:   */
1788   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1789   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1790   for (i=0; i<nrecvs; i++) {
1791     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1792   }
1793 
1794   /* do sends:
1795      1) starts[i] gives the starting index in svalues for stuff going to
1796      the ith processor
1797   */
1798   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1799   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1800   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1801   starts[0]  = 0;
1802   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1803   for (i=0; i<N; i++) {
1804     svalues[starts[owner[i]]++] = rows[i];
1805   }
1806 
1807   starts[0] = 0;
1808   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1809   count = 0;
1810   for (i=0; i<size; i++) {
1811     if (nprocs[2*i+1]) {
1812       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1813     }
1814   }
1815   ierr = PetscFree(starts);CHKERRQ(ierr);
1816 
1817   base = owners[rank];
1818 
1819   /*  wait on receives */
1820   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1821   count  = nrecvs;
1822   slen = 0;
1823   while (count) {
1824     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1825     /* unpack receives into our local space */
1826     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1827     source[imdex]  = recv_status.MPI_SOURCE;
1828     lens[imdex]    = n;
1829     slen          += n;
1830     count--;
1831   }
1832   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1833 
1834   /* move the data into the send scatter */
1835   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1836   count = 0;
1837   for (i=0; i<nrecvs; i++) {
1838     values = rvalues + i*nmax;
1839     for (j=0; j<lens[i]; j++) {
1840       lrows[count++] = values[j] - base;
1841     }
1842   }
1843   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1844   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1845   ierr = PetscFree(owner);CHKERRQ(ierr);
1846   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1847 
1848   /* fix right hand side if needed */
1849   if (x && b) {
1850     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1851     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1852     for (i=0; i<slen; i++) {
1853       bb[lrows[i]] = diag*xx[lrows[i]];
1854     }
1855     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1856     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1857   }
1858 
1859   /* actually zap the local rows */
1860   /*
1861         Zero the required rows. If the "diagonal block" of the matrix
1862      is square and the user wishes to set the diagonal we use separate
1863      code so that MatSetValues() is not called for each diagonal allocating
1864      new memory, thus calling lots of mallocs and slowing things down.
1865 
1866   */
1867   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1868   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1869   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1870     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1871   } else if (diag != 0.0) {
1872     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1873     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1874        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1875     for (i=0; i<slen; i++) {
1876       row  = lrows[i] + rstart_bs;
1877       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1878     }
1879     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1880     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1881   } else {
1882     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1883   }
1884 
1885   ierr = PetscFree(lrows);CHKERRQ(ierr);
1886 
1887   /* wait on sends */
1888   if (nsends) {
1889     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1890     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1891     ierr = PetscFree(send_status);CHKERRQ(ierr);
1892   }
1893   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1894   ierr = PetscFree(svalues);CHKERRQ(ierr);
1895 
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1901 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1902 {
1903   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatEqual_MPIBAIJ"
1915 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1916 {
1917   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1918   Mat            a,b,c,d;
1919   PetscBool      flg;
1920   PetscErrorCode ierr;
1921 
1922   PetscFunctionBegin;
1923   a = matA->A; b = matA->B;
1924   c = matB->A; d = matB->B;
1925 
1926   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1927   if (flg) {
1928     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1929   }
1930   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatCopy_MPIBAIJ"
1936 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1937 {
1938   PetscErrorCode ierr;
1939   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1940   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1941 
1942   PetscFunctionBegin;
1943   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1944   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1945     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1946   } else {
1947     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1948     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1949   }
1950   PetscFunctionReturn(0);
1951 }
1952 
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1955 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1956 {
1957   PetscErrorCode ierr;
1958 
1959   PetscFunctionBegin;
1960   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1966 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1967 {
1968   PetscErrorCode ierr;
1969   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1970   PetscBLASInt   bnz,one=1;
1971   Mat_SeqBAIJ    *x,*y;
1972 
1973   PetscFunctionBegin;
1974   if (str == SAME_NONZERO_PATTERN) {
1975     PetscScalar alpha = a;
1976     x = (Mat_SeqBAIJ *)xx->A->data;
1977     y = (Mat_SeqBAIJ *)yy->A->data;
1978     bnz = PetscBLASIntCast(x->nz);
1979     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1980     x = (Mat_SeqBAIJ *)xx->B->data;
1981     y = (Mat_SeqBAIJ *)yy->B->data;
1982     bnz = PetscBLASIntCast(x->nz);
1983     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1984   } else {
1985     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1986   }
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1992 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1993 {
1994   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1995   PetscInt rbs,cbs;
1996   PetscErrorCode ierr;
1997 
1998   PetscFunctionBegin;
1999   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2000   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2001   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2002   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2003   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2004   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2005   PetscFunctionReturn(0);
2006 }
2007 
2008 #undef __FUNCT__
2009 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2010 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2011 {
2012   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2013   PetscErrorCode ierr;
2014 
2015   PetscFunctionBegin;
2016   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2017   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2023 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2024 {
2025   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2026   PetscErrorCode ierr;
2027 
2028   PetscFunctionBegin;
2029   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2030   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 #undef __FUNCT__
2035 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2036 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2037 {
2038   PetscErrorCode ierr;
2039   IS             iscol_local;
2040   PetscInt       csize;
2041 
2042   PetscFunctionBegin;
2043   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2044   if (call == MAT_REUSE_MATRIX) {
2045     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2046     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2047   } else {
2048     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2049   }
2050   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2051   if (call == MAT_INITIAL_MATRIX) {
2052     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2053     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2054   }
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2060 /*
2061     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2062   in local and then by concatenating the local matrices the end result.
2063   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2064 */
2065 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2066 {
2067   PetscErrorCode ierr;
2068   PetscMPIInt    rank,size;
2069   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2070   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2071   Mat            *local,M,Mreuse;
2072   MatScalar      *vwork,*aa;
2073   MPI_Comm       comm = ((PetscObject)mat)->comm;
2074   Mat_SeqBAIJ    *aij;
2075 
2076 
2077   PetscFunctionBegin;
2078   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2079   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2080 
2081   if (call ==  MAT_REUSE_MATRIX) {
2082     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2083     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2084     local = &Mreuse;
2085     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2086   } else {
2087     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2088     Mreuse = *local;
2089     ierr   = PetscFree(local);CHKERRQ(ierr);
2090   }
2091 
2092   /*
2093       m - number of local rows
2094       n - number of columns (same on all processors)
2095       rstart - first row in new global matrix generated
2096   */
2097   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2098   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2099   m    = m/bs;
2100   n    = n/bs;
2101 
2102   if (call == MAT_INITIAL_MATRIX) {
2103     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2104     ii  = aij->i;
2105     jj  = aij->j;
2106 
2107     /*
2108         Determine the number of non-zeros in the diagonal and off-diagonal
2109         portions of the matrix in order to do correct preallocation
2110     */
2111 
2112     /* first get start and end of "diagonal" columns */
2113     if (csize == PETSC_DECIDE) {
2114       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2115       if (mglobal == n*bs) { /* square matrix */
2116 	nlocal = m;
2117       } else {
2118         nlocal = n/size + ((n % size) > rank);
2119       }
2120     } else {
2121       nlocal = csize/bs;
2122     }
2123     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2124     rstart = rend - nlocal;
2125     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2126 
2127     /* next, compute all the lengths */
2128     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2129     olens = dlens + m;
2130     for (i=0; i<m; i++) {
2131       jend = ii[i+1] - ii[i];
2132       olen = 0;
2133       dlen = 0;
2134       for (j=0; j<jend; j++) {
2135         if (*jj < rstart || *jj >= rend) olen++;
2136         else dlen++;
2137         jj++;
2138       }
2139       olens[i] = olen;
2140       dlens[i] = dlen;
2141     }
2142     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2143     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2144     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2145     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2146     ierr = PetscFree(dlens);CHKERRQ(ierr);
2147   } else {
2148     PetscInt ml,nl;
2149 
2150     M = *newmat;
2151     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2152     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2153     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2154     /*
2155          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2156        rather than the slower MatSetValues().
2157     */
2158     M->was_assembled = PETSC_TRUE;
2159     M->assembled     = PETSC_FALSE;
2160   }
2161   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2162   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2163   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2164   ii  = aij->i;
2165   jj  = aij->j;
2166   aa  = aij->a;
2167   for (i=0; i<m; i++) {
2168     row   = rstart/bs + i;
2169     nz    = ii[i+1] - ii[i];
2170     cwork = jj;     jj += nz;
2171     vwork = aa;     aa += nz;
2172     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2173   }
2174 
2175   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2176   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2177   *newmat = M;
2178 
2179   /* save submatrix used in processor for next request */
2180   if (call ==  MAT_INITIAL_MATRIX) {
2181     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2182     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2183   }
2184 
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 #undef __FUNCT__
2189 #define __FUNCT__ "MatPermute_MPIBAIJ"
2190 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2191 {
2192   MPI_Comm       comm,pcomm;
2193   PetscInt       first,local_size,nrows;
2194   const PetscInt *rows;
2195   PetscMPIInt    size;
2196   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2197   PetscErrorCode ierr;
2198 
2199   PetscFunctionBegin;
2200   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2201   /* make a collective version of 'rowp' */
2202   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2203   if (pcomm==comm) {
2204     crowp = rowp;
2205   } else {
2206     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2207     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2208     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2209     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2210   }
2211   /* collect the global row permutation and invert it */
2212   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2213   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2214   if (pcomm!=comm) {
2215     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2216   }
2217   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2218   /* get the local target indices */
2219   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2220   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2221   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2222   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2223   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2224   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2225   /* the column permutation is so much easier;
2226      make a local version of 'colp' and invert it */
2227   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2228   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2229   if (size==1) {
2230     lcolp = colp;
2231   } else {
2232     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2233     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2234     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2235   }
2236   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2237   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2238   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2239   if (size>1) {
2240     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2241     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2242   }
2243   /* now we just get the submatrix */
2244   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2245   /* clean up */
2246   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2247   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2253 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2254 {
2255   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2256   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2257 
2258   PetscFunctionBegin;
2259   if (nghosts) { *nghosts = B->nbs;}
2260   if (ghosts) {*ghosts = baij->garray;}
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2265 
2266 #undef __FUNCT__
2267 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2268 /*
2269     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2270 */
2271 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2272 {
2273   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2274   PetscErrorCode        ierr;
2275   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2276   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2277   const PetscInt        *is;
2278   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2279   PetscInt              *rowhit,M,cstart,cend,colb;
2280   PetscInt              *columnsforrow,l;
2281   IS                    *isa;
2282   PetscBool              done,flg;
2283   ISLocalToGlobalMapping map = mat->cbmapping;
2284   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2285 
2286   PetscFunctionBegin;
2287   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2288   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2289 
2290   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2291   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2292   M                = mat->rmap->n/bs;
2293   cstart           = mat->cmap->rstart/bs;
2294   cend             = mat->cmap->rend/bs;
2295   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2296   c->N             = mat->cmap->N/bs;
2297   c->m             = mat->rmap->n/bs;
2298   c->rstart        = mat->rmap->rstart/bs;
2299 
2300   c->ncolors       = nis;
2301   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2302   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2303   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2304   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2305   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2306   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2307 
2308   /* Allow access to data structures of local part of matrix */
2309   if (!baij->colmap) {
2310     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2311   }
2312   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2313   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2314 
2315   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2316   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2317 
2318   for (i=0; i<nis; i++) {
2319     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2320     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2321     c->ncolumns[i] = n;
2322     if (n) {
2323       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2324       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2325       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2326     } else {
2327       c->columns[i]  = 0;
2328     }
2329 
2330     if (ctype == IS_COLORING_GLOBAL){
2331       /* Determine the total (parallel) number of columns of this color */
2332       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2333       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2334 
2335       nn   = PetscMPIIntCast(n);
2336       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2337       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2338       if (!nctot) {
2339         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2340       }
2341 
2342       disp[0] = 0;
2343       for (j=1; j<size; j++) {
2344         disp[j] = disp[j-1] + ncolsonproc[j-1];
2345       }
2346 
2347       /* Get complete list of columns for color on each processor */
2348       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2349       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2350       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2351     } else if (ctype == IS_COLORING_GHOSTED){
2352       /* Determine local number of columns of this color on this process, including ghost points */
2353       nctot = n;
2354       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2355       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2356     } else {
2357       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2358     }
2359 
2360     /*
2361        Mark all rows affect by these columns
2362     */
2363     /* Temporary option to allow for debugging/testing */
2364     flg  = PETSC_FALSE;
2365     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2366     if (!flg) {/*-----------------------------------------------------------------------------*/
2367       /* crude, fast version */
2368       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2369       /* loop over columns*/
2370       for (j=0; j<nctot; j++) {
2371         if (ctype == IS_COLORING_GHOSTED) {
2372           col = ltog[cols[j]];
2373         } else {
2374           col  = cols[j];
2375         }
2376         if (col >= cstart && col < cend) {
2377           /* column is in diagonal block of matrix */
2378           rows = A_cj + A_ci[col-cstart];
2379           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2380         } else {
2381 #if defined (PETSC_USE_CTABLE)
2382           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2383 	  colb --;
2384 #else
2385           colb = baij->colmap[col] - 1;
2386 #endif
2387           if (colb == -1) {
2388             m = 0;
2389           } else {
2390             colb = colb/bs;
2391             rows = B_cj + B_ci[colb];
2392             m    = B_ci[colb+1] - B_ci[colb];
2393           }
2394         }
2395         /* loop over columns marking them in rowhit */
2396         for (k=0; k<m; k++) {
2397           rowhit[*rows++] = col + 1;
2398         }
2399       }
2400 
2401       /* count the number of hits */
2402       nrows = 0;
2403       for (j=0; j<M; j++) {
2404         if (rowhit[j]) nrows++;
2405       }
2406       c->nrows[i]         = nrows;
2407       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2408       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2409       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2410       nrows = 0;
2411       for (j=0; j<M; j++) {
2412         if (rowhit[j]) {
2413           c->rows[i][nrows]           = j;
2414           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2415           nrows++;
2416         }
2417       }
2418     } else {/*-------------------------------------------------------------------------------*/
2419       /* slow version, using rowhit as a linked list */
2420       PetscInt currentcol,fm,mfm;
2421       rowhit[M] = M;
2422       nrows     = 0;
2423       /* loop over columns*/
2424       for (j=0; j<nctot; j++) {
2425         if (ctype == IS_COLORING_GHOSTED) {
2426           col = ltog[cols[j]];
2427         } else {
2428           col  = cols[j];
2429         }
2430         if (col >= cstart && col < cend) {
2431           /* column is in diagonal block of matrix */
2432           rows = A_cj + A_ci[col-cstart];
2433           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2434         } else {
2435 #if defined (PETSC_USE_CTABLE)
2436           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2437           colb --;
2438 #else
2439           colb = baij->colmap[col] - 1;
2440 #endif
2441           if (colb == -1) {
2442             m = 0;
2443           } else {
2444             colb = colb/bs;
2445             rows = B_cj + B_ci[colb];
2446             m    = B_ci[colb+1] - B_ci[colb];
2447           }
2448         }
2449 
2450         /* loop over columns marking them in rowhit */
2451         fm    = M; /* fm points to first entry in linked list */
2452         for (k=0; k<m; k++) {
2453           currentcol = *rows++;
2454 	  /* is it already in the list? */
2455           do {
2456             mfm  = fm;
2457             fm   = rowhit[fm];
2458           } while (fm < currentcol);
2459           /* not in list so add it */
2460           if (fm != currentcol) {
2461             nrows++;
2462             columnsforrow[currentcol] = col;
2463             /* next three lines insert new entry into linked list */
2464             rowhit[mfm]               = currentcol;
2465             rowhit[currentcol]        = fm;
2466             fm                        = currentcol;
2467             /* fm points to present position in list since we know the columns are sorted */
2468           } else {
2469             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2470           }
2471         }
2472       }
2473       c->nrows[i]         = nrows;
2474       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2475       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2476       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2477       /* now store the linked list of rows into c->rows[i] */
2478       nrows = 0;
2479       fm    = rowhit[M];
2480       do {
2481         c->rows[i][nrows]            = fm;
2482         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2483         fm                           = rowhit[fm];
2484       } while (fm < M);
2485     } /* ---------------------------------------------------------------------------------------*/
2486     ierr = PetscFree(cols);CHKERRQ(ierr);
2487   }
2488 
2489   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2490   /*
2491        vscale will contain the "diagonal" on processor scalings followed by the off processor
2492   */
2493   if (ctype == IS_COLORING_GLOBAL) {
2494     PetscInt *garray;
2495     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2496     for (i=0; i<baij->B->cmap->n/bs; i++) {
2497       for (j=0; j<bs; j++) {
2498         garray[i*bs+j] = bs*baij->garray[i]+j;
2499       }
2500     }
2501     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2502     ierr = PetscFree(garray);CHKERRQ(ierr);
2503     CHKMEMQ;
2504     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2505     for (k=0; k<c->ncolors; k++) {
2506       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2507       for (l=0; l<c->nrows[k]; l++) {
2508         col = c->columnsforrow[k][l];
2509         if (col >= cstart && col < cend) {
2510           /* column is in diagonal block of matrix */
2511           colb = col - cstart;
2512         } else {
2513           /* column  is in "off-processor" part */
2514 #if defined (PETSC_USE_CTABLE)
2515           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2516           colb --;
2517 #else
2518           colb = baij->colmap[col] - 1;
2519 #endif
2520           colb = colb/bs;
2521           colb += cend - cstart;
2522         }
2523         c->vscaleforrow[k][l] = colb;
2524       }
2525     }
2526   } else if (ctype == IS_COLORING_GHOSTED) {
2527     /* Get gtol mapping */
2528     PetscInt N = mat->cmap->N, *gtol;
2529     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2530     for (i=0; i<N; i++) gtol[i] = -1;
2531     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2532 
2533     c->vscale = 0; /* will be created in MatFDColoringApply() */
2534     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2535     for (k=0; k<c->ncolors; k++) {
2536       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2537       for (l=0; l<c->nrows[k]; l++) {
2538         col = c->columnsforrow[k][l];      /* global column index */
2539         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2540       }
2541     }
2542     ierr = PetscFree(gtol);CHKERRQ(ierr);
2543   }
2544   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2545 
2546   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2547   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2548   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2549   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2550     CHKMEMQ;
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2556 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2557 {
2558   Mat            B;
2559   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2560   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2561   Mat_SeqAIJ     *b;
2562   PetscErrorCode ierr;
2563   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2564   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2565   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2566 
2567   PetscFunctionBegin;
2568   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2569   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2570 
2571   /* ----------------------------------------------------------------
2572      Tell every processor the number of nonzeros per row
2573   */
2574   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2575   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2576     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2577   }
2578   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2579   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2580   displs     = recvcounts + size;
2581   for (i=0; i<size; i++) {
2582     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2583     displs[i]     = A->rmap->range[i]/bs;
2584   }
2585 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2586   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2587 #else
2588   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2589 #endif
2590   /* ---------------------------------------------------------------
2591      Create the sequential matrix of the same type as the local block diagonal
2592   */
2593   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2594   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2595   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2596   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2597   b = (Mat_SeqAIJ *)B->data;
2598 
2599   /*--------------------------------------------------------------------
2600     Copy my part of matrix column indices over
2601   */
2602   sendcount  = ad->nz + bd->nz;
2603   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2604   a_jsendbuf = ad->j;
2605   b_jsendbuf = bd->j;
2606   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2607   cnt        = 0;
2608   for (i=0; i<n; i++) {
2609 
2610     /* put in lower diagonal portion */
2611     m = bd->i[i+1] - bd->i[i];
2612     while (m > 0) {
2613       /* is it above diagonal (in bd (compressed) numbering) */
2614       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2615       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2616       m--;
2617     }
2618 
2619     /* put in diagonal portion */
2620     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2621       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2622     }
2623 
2624     /* put in upper diagonal portion */
2625     while (m-- > 0) {
2626       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2627     }
2628   }
2629   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2630 
2631   /*--------------------------------------------------------------------
2632     Gather all column indices to all processors
2633   */
2634   for (i=0; i<size; i++) {
2635     recvcounts[i] = 0;
2636     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2637       recvcounts[i] += lens[j];
2638     }
2639   }
2640   displs[0]  = 0;
2641   for (i=1; i<size; i++) {
2642     displs[i] = displs[i-1] + recvcounts[i-1];
2643   }
2644 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2645   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2646 #else
2647   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2648 #endif
2649   /*--------------------------------------------------------------------
2650     Assemble the matrix into useable form (note numerical values not yet set)
2651   */
2652   /* set the b->ilen (length of each row) values */
2653   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2654   /* set the b->i indices */
2655   b->i[0] = 0;
2656   for (i=1; i<=A->rmap->N/bs; i++) {
2657     b->i[i] = b->i[i-1] + lens[i-1];
2658   }
2659   ierr = PetscFree(lens);CHKERRQ(ierr);
2660   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2661   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2662   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2663 
2664   if (A->symmetric){
2665     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2666   } else if (A->hermitian) {
2667     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2668   } else if (A->structurally_symmetric) {
2669     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2670   }
2671   *newmat = B;
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 #undef __FUNCT__
2676 #define __FUNCT__ "MatSOR_MPIBAIJ"
2677 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2678 {
2679   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2680   PetscErrorCode ierr;
2681   Vec            bb1 = 0;
2682 
2683   PetscFunctionBegin;
2684   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2685     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2686   }
2687 
2688   if (flag == SOR_APPLY_UPPER) {
2689     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2690     PetscFunctionReturn(0);
2691   }
2692 
2693   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2694     if (flag & SOR_ZERO_INITIAL_GUESS) {
2695       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2696       its--;
2697     }
2698 
2699     while (its--) {
2700       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2701       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2702 
2703       /* update rhs: bb1 = bb - B*x */
2704       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2705       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2706 
2707       /* local sweep */
2708       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2709     }
2710   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2711     if (flag & SOR_ZERO_INITIAL_GUESS) {
2712       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2713       its--;
2714     }
2715     while (its--) {
2716       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2718 
2719       /* update rhs: bb1 = bb - B*x */
2720       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2721       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2722 
2723       /* local sweep */
2724       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2725     }
2726   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2727     if (flag & SOR_ZERO_INITIAL_GUESS) {
2728       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2729       its--;
2730     }
2731     while (its--) {
2732       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2733       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2734 
2735       /* update rhs: bb1 = bb - B*x */
2736       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2737       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2738 
2739       /* local sweep */
2740       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2741     }
2742   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2743 
2744   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2749 
2750 
2751 /* -------------------------------------------------------------------*/
2752 static struct _MatOps MatOps_Values = {
2753        MatSetValues_MPIBAIJ,
2754        MatGetRow_MPIBAIJ,
2755        MatRestoreRow_MPIBAIJ,
2756        MatMult_MPIBAIJ,
2757 /* 4*/ MatMultAdd_MPIBAIJ,
2758        MatMultTranspose_MPIBAIJ,
2759        MatMultTransposeAdd_MPIBAIJ,
2760        0,
2761        0,
2762        0,
2763 /*10*/ 0,
2764        0,
2765        0,
2766        MatSOR_MPIBAIJ,
2767        MatTranspose_MPIBAIJ,
2768 /*15*/ MatGetInfo_MPIBAIJ,
2769        MatEqual_MPIBAIJ,
2770        MatGetDiagonal_MPIBAIJ,
2771        MatDiagonalScale_MPIBAIJ,
2772        MatNorm_MPIBAIJ,
2773 /*20*/ MatAssemblyBegin_MPIBAIJ,
2774        MatAssemblyEnd_MPIBAIJ,
2775        MatSetOption_MPIBAIJ,
2776        MatZeroEntries_MPIBAIJ,
2777 /*24*/ MatZeroRows_MPIBAIJ,
2778        0,
2779        0,
2780        0,
2781        0,
2782 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2783        0,
2784        0,
2785        0,
2786        0,
2787 /*34*/ MatDuplicate_MPIBAIJ,
2788        0,
2789        0,
2790        0,
2791        0,
2792 /*39*/ MatAXPY_MPIBAIJ,
2793        MatGetSubMatrices_MPIBAIJ,
2794        MatIncreaseOverlap_MPIBAIJ,
2795        MatGetValues_MPIBAIJ,
2796        MatCopy_MPIBAIJ,
2797 /*44*/ 0,
2798        MatScale_MPIBAIJ,
2799        0,
2800        0,
2801        0,
2802 /*49*/ MatSetBlockSize_MPIBAIJ,
2803        0,
2804        0,
2805        0,
2806        0,
2807 /*54*/ MatFDColoringCreate_MPIBAIJ,
2808        0,
2809        MatSetUnfactored_MPIBAIJ,
2810        MatPermute_MPIBAIJ,
2811        MatSetValuesBlocked_MPIBAIJ,
2812 /*59*/ MatGetSubMatrix_MPIBAIJ,
2813        MatDestroy_MPIBAIJ,
2814        MatView_MPIBAIJ,
2815        0,
2816        0,
2817 /*64*/ 0,
2818        0,
2819        0,
2820        0,
2821        0,
2822 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2823        0,
2824        0,
2825        0,
2826        0,
2827 /*74*/ 0,
2828        MatFDColoringApply_BAIJ,
2829        0,
2830        0,
2831        0,
2832 /*79*/ 0,
2833        0,
2834        0,
2835        0,
2836        MatLoad_MPIBAIJ,
2837 /*84*/ 0,
2838        0,
2839        0,
2840        0,
2841        0,
2842 /*89*/ 0,
2843        0,
2844        0,
2845        0,
2846        0,
2847 /*94*/ 0,
2848        0,
2849        0,
2850        0,
2851        0,
2852 /*99*/ 0,
2853        0,
2854        0,
2855        0,
2856        0,
2857 /*104*/0,
2858        MatRealPart_MPIBAIJ,
2859        MatImaginaryPart_MPIBAIJ,
2860        0,
2861        0,
2862 /*109*/0,
2863        0,
2864        0,
2865        0,
2866        0,
2867 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2868        0,
2869        MatGetGhosts_MPIBAIJ,
2870        0,
2871        0,
2872 /*119*/0,
2873        0,
2874        0,
2875        0
2876 };
2877 
2878 EXTERN_C_BEGIN
2879 #undef __FUNCT__
2880 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2881 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *a)
2882 {
2883   PetscFunctionBegin;
2884   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2885   *iscopy = PETSC_FALSE;
2886   PetscFunctionReturn(0);
2887 }
2888 EXTERN_C_END
2889 
2890 EXTERN_C_BEGIN
2891 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2892 EXTERN_C_END
2893 
2894 EXTERN_C_BEGIN
2895 #undef __FUNCT__
2896 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2897 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2898 {
2899   PetscInt       m,rstart,cstart,cend;
2900   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2901   const PetscInt *JJ=0;
2902   PetscScalar    *values=0;
2903   PetscErrorCode ierr;
2904 
2905   PetscFunctionBegin;
2906 
2907   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2908   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2909   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2910   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2911   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2912   m      = B->rmap->n/bs;
2913   rstart = B->rmap->rstart/bs;
2914   cstart = B->cmap->rstart/bs;
2915   cend   = B->cmap->rend/bs;
2916 
2917   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2918   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2919   for (i=0; i<m; i++) {
2920     nz = ii[i+1] - ii[i];
2921     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2922     nz_max = PetscMax(nz_max,nz);
2923     JJ  = jj + ii[i];
2924     for (j=0; j<nz; j++) {
2925       if (*JJ >= cstart) break;
2926       JJ++;
2927     }
2928     d = 0;
2929     for (; j<nz; j++) {
2930       if (*JJ++ >= cend) break;
2931       d++;
2932     }
2933     d_nnz[i] = d;
2934     o_nnz[i] = nz - d;
2935   }
2936   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2937   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2938 
2939   values = (PetscScalar*)V;
2940   if (!values) {
2941     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2942     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2943   }
2944   for (i=0; i<m; i++) {
2945     PetscInt          row    = i + rstart;
2946     PetscInt          ncols  = ii[i+1] - ii[i];
2947     const PetscInt    *icols = jj + ii[i];
2948     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2949     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2950   }
2951 
2952   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2953   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2954   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2955 
2956   PetscFunctionReturn(0);
2957 }
2958 EXTERN_C_END
2959 
2960 #undef __FUNCT__
2961 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2962 /*@C
2963    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2964    (the default parallel PETSc format).
2965 
2966    Collective on MPI_Comm
2967 
2968    Input Parameters:
2969 +  A - the matrix
2970 .  bs - the block size
2971 .  i - the indices into j for the start of each local row (starts with zero)
2972 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2973 -  v - optional values in the matrix
2974 
2975    Level: developer
2976 
2977 .keywords: matrix, aij, compressed row, sparse, parallel
2978 
2979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2980 @*/
2981 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2982 {
2983   PetscErrorCode ierr;
2984 
2985   PetscFunctionBegin;
2986   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 EXTERN_C_BEGIN
2991 #undef __FUNCT__
2992 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2993 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2994 {
2995   Mat_MPIBAIJ    *b;
2996   PetscErrorCode ierr;
2997   PetscInt       i, newbs = PetscAbs(bs);
2998 
2999   PetscFunctionBegin;
3000   if (bs < 0) {
3001     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
3002       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3003     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3004     bs   = PetscAbs(bs);
3005   }
3006   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3007   bs = newbs;
3008 
3009 
3010   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3011   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3012   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3013   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3014   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3015 
3016   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3017   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3018   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3019   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3020 
3021   if (d_nnz) {
3022     for (i=0; i<B->rmap->n/bs; i++) {
3023       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3024     }
3025   }
3026   if (o_nnz) {
3027     for (i=0; i<B->rmap->n/bs; i++) {
3028       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3029     }
3030   }
3031 
3032   b = (Mat_MPIBAIJ*)B->data;
3033   b->bs2 = bs*bs;
3034   b->mbs = B->rmap->n/bs;
3035   b->nbs = B->cmap->n/bs;
3036   b->Mbs = B->rmap->N/bs;
3037   b->Nbs = B->cmap->N/bs;
3038 
3039   for (i=0; i<=b->size; i++) {
3040     b->rangebs[i] = B->rmap->range[i]/bs;
3041   }
3042   b->rstartbs = B->rmap->rstart/bs;
3043   b->rendbs   = B->rmap->rend/bs;
3044   b->cstartbs = B->cmap->rstart/bs;
3045   b->cendbs   = B->cmap->rend/bs;
3046 
3047   if (!B->preallocated) {
3048     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3049     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3050     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3051     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3052     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3053     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3054     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3055     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3056     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3057   }
3058 
3059   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3060   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3061   B->preallocated = PETSC_TRUE;
3062   PetscFunctionReturn(0);
3063 }
3064 EXTERN_C_END
3065 
3066 EXTERN_C_BEGIN
3067 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3068 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3069 EXTERN_C_END
3070 
3071 
3072 EXTERN_C_BEGIN
3073 #undef __FUNCT__
3074 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3075 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3076 {
3077   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3078   PetscErrorCode ierr;
3079   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3080   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3081   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3082 
3083   PetscFunctionBegin;
3084   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3085   ii[0] = 0;
3086   CHKMEMQ;
3087   for (i=0; i<M; i++) {
3088     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3089     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3090     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3091     /* remove one from count of matrix has diagonal */
3092     for (j=id[i]; j<id[i+1]; j++) {
3093       if (jd[j] == i) {ii[i+1]--;break;}
3094     }
3095   CHKMEMQ;
3096   }
3097   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3098   cnt = 0;
3099   for (i=0; i<M; i++) {
3100     for (j=io[i]; j<io[i+1]; j++) {
3101       if (garray[jo[j]] > rstart) break;
3102       jj[cnt++] = garray[jo[j]];
3103   CHKMEMQ;
3104     }
3105     for (k=id[i]; k<id[i+1]; k++) {
3106       if (jd[k] != i) {
3107         jj[cnt++] = rstart + jd[k];
3108   CHKMEMQ;
3109       }
3110     }
3111     for (;j<io[i+1]; j++) {
3112       jj[cnt++] = garray[jo[j]];
3113   CHKMEMQ;
3114     }
3115   }
3116   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3117   PetscFunctionReturn(0);
3118 }
3119 EXTERN_C_END
3120 
3121 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3122 EXTERN_C_BEGIN
3123 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3124 EXTERN_C_END
3125 
3126 EXTERN_C_BEGIN
3127 #undef __FUNCT__
3128 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3129 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3130 {
3131   PetscErrorCode ierr;
3132   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3133   Mat            B;
3134   Mat_MPIAIJ     *b;
3135 
3136   PetscFunctionBegin;
3137   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3138 
3139   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3140   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3141   ierr = MatSetType(B,newtype);CHKERRQ(ierr);
3142   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3143   b = (Mat_MPIAIJ*) B->data;
3144 
3145   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3146   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3147   ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3148   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3149   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3150   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3151   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3152   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3153   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3154   if (reuse == MAT_REUSE_MATRIX) {
3155     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3156   } else {
3157    *newmat = B;
3158   }
3159   PetscFunctionReturn(0);
3160 }
3161 EXTERN_C_END
3162 
3163 EXTERN_C_BEGIN
3164 #if defined(PETSC_HAVE_MUMPS)
3165 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3166 #endif
3167 EXTERN_C_END
3168 
3169 /*MC
3170    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3171 
3172    Options Database Keys:
3173 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3174 . -mat_block_size <bs> - set the blocksize used to store the matrix
3175 - -mat_use_hash_table <fact>
3176 
3177   Level: beginner
3178 
3179 .seealso: MatCreateMPIBAIJ
3180 M*/
3181 
3182 EXTERN_C_BEGIN
3183 #undef __FUNCT__
3184 #define __FUNCT__ "MatCreate_MPIBAIJ"
3185 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3186 {
3187   Mat_MPIBAIJ    *b;
3188   PetscErrorCode ierr;
3189   PetscBool      flg;
3190 
3191   PetscFunctionBegin;
3192   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3193   B->data = (void*)b;
3194 
3195   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3196   B->assembled  = PETSC_FALSE;
3197 
3198   B->insertmode = NOT_SET_VALUES;
3199   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3200   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3201 
3202   /* build local table of row and column ownerships */
3203   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3204 
3205   /* build cache for off array entries formed */
3206   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3207   b->donotstash  = PETSC_FALSE;
3208   b->colmap      = PETSC_NULL;
3209   b->garray      = PETSC_NULL;
3210   b->roworiented = PETSC_TRUE;
3211 
3212   /* stuff used in block assembly */
3213   b->barray       = 0;
3214 
3215   /* stuff used for matrix vector multiply */
3216   b->lvec         = 0;
3217   b->Mvctx        = 0;
3218 
3219   /* stuff for MatGetRow() */
3220   b->rowindices   = 0;
3221   b->rowvalues    = 0;
3222   b->getrowactive = PETSC_FALSE;
3223 
3224   /* hash table stuff */
3225   b->ht           = 0;
3226   b->hd           = 0;
3227   b->ht_size      = 0;
3228   b->ht_flag      = PETSC_FALSE;
3229   b->ht_fact      = 0;
3230   b->ht_total_ct  = 0;
3231   b->ht_insert_ct = 0;
3232 
3233   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3234   b->ijonly       = PETSC_FALSE;
3235 
3236   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3237     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3238     if (flg) {
3239       PetscReal fact = 1.39;
3240       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3241       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3242       if (fact <= 1.0) fact = 1.39;
3243       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3244       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3245     }
3246   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3247 
3248 #if defined(PETSC_HAVE_MUMPS)
3249   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3250 #endif
3251   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3252                                      "MatConvert_MPIBAIJ_MPIAdj",
3253                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3254   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3255                                      "MatConvert_MPIBAIJ_MPIAIJ",
3256                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3257   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3258                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3259                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3260   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3261                                      "MatStoreValues_MPIBAIJ",
3262                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3263   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3264                                      "MatRetrieveValues_MPIBAIJ",
3265                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3266   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3267                                      "MatGetDiagonalBlock_MPIBAIJ",
3268                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3269   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3270                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3271                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3272   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3273 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3274 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3276                                      "MatDiagonalScaleLocal_MPIBAIJ",
3277                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3278   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3279                                      "MatSetHashTableFactor_MPIBAIJ",
3280                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3281   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3282   PetscFunctionReturn(0);
3283 }
3284 EXTERN_C_END
3285 
3286 /*MC
3287    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3288 
3289    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3290    and MATMPIBAIJ otherwise.
3291 
3292    Options Database Keys:
3293 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3294 
3295   Level: beginner
3296 
3297 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3298 M*/
3299 
3300 EXTERN_C_BEGIN
3301 #undef __FUNCT__
3302 #define __FUNCT__ "MatCreate_BAIJ"
3303 PetscErrorCode  MatCreate_BAIJ(Mat A)
3304 {
3305   PetscErrorCode ierr;
3306   PetscMPIInt    size;
3307 
3308   PetscFunctionBegin;
3309   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3310   if (size == 1) {
3311     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3312   } else {
3313     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3314   }
3315   PetscFunctionReturn(0);
3316 }
3317 EXTERN_C_END
3318 
3319 #undef __FUNCT__
3320 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3321 /*@C
3322    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3323    (block compressed row).  For good matrix assembly performance
3324    the user should preallocate the matrix storage by setting the parameters
3325    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3326    performance can be increased by more than a factor of 50.
3327 
3328    Collective on Mat
3329 
3330    Input Parameters:
3331 +  A - the matrix
3332 .  bs   - size of blockk
3333 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3334            submatrix  (same for all local rows)
3335 .  d_nnz - array containing the number of block nonzeros in the various block rows
3336            of the in diagonal portion of the local (possibly different for each block
3337            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3338 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3339            submatrix (same for all local rows).
3340 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3341            off-diagonal portion of the local submatrix (possibly different for
3342            each block row) or PETSC_NULL.
3343 
3344    If the *_nnz parameter is given then the *_nz parameter is ignored
3345 
3346    Options Database Keys:
3347 +   -mat_block_size - size of the blocks to use
3348 -   -mat_use_hash_table <fact>
3349 
3350    Notes:
3351    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3352    than it must be used on all processors that share the object for that argument.
3353 
3354    Storage Information:
3355    For a square global matrix we define each processor's diagonal portion
3356    to be its local rows and the corresponding columns (a square submatrix);
3357    each processor's off-diagonal portion encompasses the remainder of the
3358    local matrix (a rectangular submatrix).
3359 
3360    The user can specify preallocated storage for the diagonal part of
3361    the local submatrix with either d_nz or d_nnz (not both).  Set
3362    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3363    memory allocation.  Likewise, specify preallocated storage for the
3364    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3365 
3366    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3367    the figure below we depict these three local rows and all columns (0-11).
3368 
3369 .vb
3370            0 1 2 3 4 5 6 7 8 9 10 11
3371           -------------------
3372    row 3  |  o o o d d d o o o o o o
3373    row 4  |  o o o d d d o o o o o o
3374    row 5  |  o o o d d d o o o o o o
3375           -------------------
3376 .ve
3377 
3378    Thus, any entries in the d locations are stored in the d (diagonal)
3379    submatrix, and any entries in the o locations are stored in the
3380    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3381    stored simply in the MATSEQBAIJ format for compressed row storage.
3382 
3383    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3384    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3385    In general, for PDE problems in which most nonzeros are near the diagonal,
3386    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3387    or you will get TERRIBLE performance; see the users' manual chapter on
3388    matrices.
3389 
3390    You can call MatGetInfo() to get information on how effective the preallocation was;
3391    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3392    You can also run with the option -info and look for messages with the string
3393    malloc in them to see if additional memory allocation was needed.
3394 
3395    Level: intermediate
3396 
3397 .keywords: matrix, block, aij, compressed row, sparse, parallel
3398 
3399 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3400 @*/
3401 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3402 {
3403   PetscErrorCode ierr;
3404 
3405   PetscFunctionBegin;
3406   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 #undef __FUNCT__
3411 #define __FUNCT__ "MatCreateMPIBAIJ"
3412 /*@C
3413    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3414    (block compressed row).  For good matrix assembly performance
3415    the user should preallocate the matrix storage by setting the parameters
3416    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3417    performance can be increased by more than a factor of 50.
3418 
3419    Collective on MPI_Comm
3420 
3421    Input Parameters:
3422 +  comm - MPI communicator
3423 .  bs   - size of blockk
3424 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3425            This value should be the same as the local size used in creating the
3426            y vector for the matrix-vector product y = Ax.
3427 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3428            This value should be the same as the local size used in creating the
3429            x vector for the matrix-vector product y = Ax.
3430 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3431 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3432 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3433            submatrix  (same for all local rows)
3434 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3435            of the in diagonal portion of the local (possibly different for each block
3436            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3437 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3438            submatrix (same for all local rows).
3439 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3440            off-diagonal portion of the local submatrix (possibly different for
3441            each block row) or PETSC_NULL.
3442 
3443    Output Parameter:
3444 .  A - the matrix
3445 
3446    Options Database Keys:
3447 +   -mat_block_size - size of the blocks to use
3448 -   -mat_use_hash_table <fact>
3449 
3450    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3451    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3452    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3453 
3454    Notes:
3455    If the *_nnz parameter is given then the *_nz parameter is ignored
3456 
3457    A nonzero block is any block that as 1 or more nonzeros in it
3458 
3459    The user MUST specify either the local or global matrix dimensions
3460    (possibly both).
3461 
3462    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3463    than it must be used on all processors that share the object for that argument.
3464 
3465    Storage Information:
3466    For a square global matrix we define each processor's diagonal portion
3467    to be its local rows and the corresponding columns (a square submatrix);
3468    each processor's off-diagonal portion encompasses the remainder of the
3469    local matrix (a rectangular submatrix).
3470 
3471    The user can specify preallocated storage for the diagonal part of
3472    the local submatrix with either d_nz or d_nnz (not both).  Set
3473    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3474    memory allocation.  Likewise, specify preallocated storage for the
3475    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3476 
3477    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3478    the figure below we depict these three local rows and all columns (0-11).
3479 
3480 .vb
3481            0 1 2 3 4 5 6 7 8 9 10 11
3482           -------------------
3483    row 3  |  o o o d d d o o o o o o
3484    row 4  |  o o o d d d o o o o o o
3485    row 5  |  o o o d d d o o o o o o
3486           -------------------
3487 .ve
3488 
3489    Thus, any entries in the d locations are stored in the d (diagonal)
3490    submatrix, and any entries in the o locations are stored in the
3491    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3492    stored simply in the MATSEQBAIJ format for compressed row storage.
3493 
3494    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3495    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3496    In general, for PDE problems in which most nonzeros are near the diagonal,
3497    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3498    or you will get TERRIBLE performance; see the users' manual chapter on
3499    matrices.
3500 
3501    Level: intermediate
3502 
3503 .keywords: matrix, block, aij, compressed row, sparse, parallel
3504 
3505 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3506 @*/
3507 PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3508 {
3509   PetscErrorCode ierr;
3510   PetscMPIInt    size;
3511 
3512   PetscFunctionBegin;
3513   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3514   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3515   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3516   if (size > 1) {
3517     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3518     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3519   } else {
3520     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3521     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3522   }
3523   PetscFunctionReturn(0);
3524 }
3525 
3526 #undef __FUNCT__
3527 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3528 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3529 {
3530   Mat            mat;
3531   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3532   PetscErrorCode ierr;
3533   PetscInt       len=0;
3534 
3535   PetscFunctionBegin;
3536   *newmat       = 0;
3537   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3538   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3539   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3540   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3541 
3542   mat->factortype   = matin->factortype;
3543   mat->preallocated = PETSC_TRUE;
3544   mat->assembled    = PETSC_TRUE;
3545   mat->insertmode   = NOT_SET_VALUES;
3546 
3547   a      = (Mat_MPIBAIJ*)mat->data;
3548   mat->rmap->bs  = matin->rmap->bs;
3549   a->bs2   = oldmat->bs2;
3550   a->mbs   = oldmat->mbs;
3551   a->nbs   = oldmat->nbs;
3552   a->Mbs   = oldmat->Mbs;
3553   a->Nbs   = oldmat->Nbs;
3554 
3555   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3556   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3557 
3558   a->size         = oldmat->size;
3559   a->rank         = oldmat->rank;
3560   a->donotstash   = oldmat->donotstash;
3561   a->roworiented  = oldmat->roworiented;
3562   a->rowindices   = 0;
3563   a->rowvalues    = 0;
3564   a->getrowactive = PETSC_FALSE;
3565   a->barray       = 0;
3566   a->rstartbs     = oldmat->rstartbs;
3567   a->rendbs       = oldmat->rendbs;
3568   a->cstartbs     = oldmat->cstartbs;
3569   a->cendbs       = oldmat->cendbs;
3570 
3571   /* hash table stuff */
3572   a->ht           = 0;
3573   a->hd           = 0;
3574   a->ht_size      = 0;
3575   a->ht_flag      = oldmat->ht_flag;
3576   a->ht_fact      = oldmat->ht_fact;
3577   a->ht_total_ct  = 0;
3578   a->ht_insert_ct = 0;
3579 
3580   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3581   if (oldmat->colmap) {
3582 #if defined (PETSC_USE_CTABLE)
3583   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3584 #else
3585   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3586   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3587   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3588 #endif
3589   } else a->colmap = 0;
3590 
3591   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3592     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3593     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3594     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3595   } else a->garray = 0;
3596 
3597   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3598   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3599   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3600   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3601   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3602 
3603   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3604   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3605   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3606   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3607   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3608   *newmat = mat;
3609 
3610   PetscFunctionReturn(0);
3611 }
3612 
3613 #undef __FUNCT__
3614 #define __FUNCT__ "MatLoad_MPIBAIJ"
3615 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3616 {
3617   PetscErrorCode ierr;
3618   int            fd;
3619   PetscInt       i,nz,j,rstart,rend;
3620   PetscScalar    *vals,*buf;
3621   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3622   MPI_Status     status;
3623   PetscMPIInt    rank,size,maxnz;
3624   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3625   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3626   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3627   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3628   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3629   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3630 
3631   PetscFunctionBegin;
3632   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3633     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3634   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3635 
3636   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3637   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3638   if (!rank) {
3639     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3640     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3641     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3642   }
3643 
3644   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3645 
3646   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3647   M = header[1]; N = header[2];
3648 
3649   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3650   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3651   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3652 
3653   /* If global sizes are set, check if they are consistent with that given in the file */
3654   if (sizesset) {
3655     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3656   }
3657   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3658   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3659 
3660   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3661 
3662   /*
3663      This code adds extra rows to make sure the number of rows is
3664      divisible by the blocksize
3665   */
3666   Mbs        = M/bs;
3667   extra_rows = bs - M + bs*Mbs;
3668   if (extra_rows == bs) extra_rows = 0;
3669   else                  Mbs++;
3670   if (extra_rows && !rank) {
3671     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3672   }
3673 
3674   /* determine ownership of all rows */
3675   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3676     mbs        = Mbs/size + ((Mbs % size) > rank);
3677     m          = mbs*bs;
3678   } else { /* User set */
3679     m          = newmat->rmap->n;
3680     mbs        = m/bs;
3681   }
3682   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3683   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3684 
3685   /* process 0 needs enough room for process with most rows */
3686   if (!rank) {
3687     mmax = rowners[1];
3688     for (i=2; i<size; i++) {
3689       mmax = PetscMax(mmax,rowners[i]);
3690     }
3691     mmax*=bs;
3692   } else mmax = m;
3693 
3694   rowners[0] = 0;
3695   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3696   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3697   rstart = rowners[rank];
3698   rend   = rowners[rank+1];
3699 
3700   /* distribute row lengths to all processors */
3701   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3702   if (!rank) {
3703     mend = m;
3704     if (size == 1) mend = mend - extra_rows;
3705     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3706     for (j=mend; j<m; j++) locrowlens[j] = 1;
3707     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3708     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3709     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3710     for (j=0; j<m; j++) {
3711       procsnz[0] += locrowlens[j];
3712     }
3713     for (i=1; i<size; i++) {
3714       mend = browners[i+1] - browners[i];
3715       if (i == size-1) mend = mend - extra_rows;
3716       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3717       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3718       /* calculate the number of nonzeros on each processor */
3719       for (j=0; j<browners[i+1]-browners[i]; j++) {
3720         procsnz[i] += rowlengths[j];
3721       }
3722       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3723     }
3724     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3725   } else {
3726     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3727   }
3728 
3729   if (!rank) {
3730     /* determine max buffer needed and allocate it */
3731     maxnz = procsnz[0];
3732     for (i=1; i<size; i++) {
3733       maxnz = PetscMax(maxnz,procsnz[i]);
3734     }
3735     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3736 
3737     /* read in my part of the matrix column indices  */
3738     nz     = procsnz[0];
3739     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3740     mycols = ibuf;
3741     if (size == 1)  nz -= extra_rows;
3742     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3743     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3744 
3745     /* read in every ones (except the last) and ship off */
3746     for (i=1; i<size-1; i++) {
3747       nz   = procsnz[i];
3748       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3749       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3750     }
3751     /* read in the stuff for the last proc */
3752     if (size != 1) {
3753       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3754       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3755       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3756       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3757     }
3758     ierr = PetscFree(cols);CHKERRQ(ierr);
3759   } else {
3760     /* determine buffer space needed for message */
3761     nz = 0;
3762     for (i=0; i<m; i++) {
3763       nz += locrowlens[i];
3764     }
3765     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3766     mycols = ibuf;
3767     /* receive message of column indices*/
3768     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3769     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3770     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3771   }
3772 
3773   /* loop over local rows, determining number of off diagonal entries */
3774   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3775   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3776   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3777   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3778   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3779   rowcount = 0; nzcount = 0;
3780   for (i=0; i<mbs; i++) {
3781     dcount  = 0;
3782     odcount = 0;
3783     for (j=0; j<bs; j++) {
3784       kmax = locrowlens[rowcount];
3785       for (k=0; k<kmax; k++) {
3786         tmp = mycols[nzcount++]/bs;
3787         if (!mask[tmp]) {
3788           mask[tmp] = 1;
3789           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3790           else masked1[dcount++] = tmp;
3791         }
3792       }
3793       rowcount++;
3794     }
3795 
3796     dlens[i]  = dcount;
3797     odlens[i] = odcount;
3798 
3799     /* zero out the mask elements we set */
3800     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3801     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3802   }
3803 
3804 
3805   if (!sizesset) {
3806     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3807   }
3808   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3809 
3810   if (!rank) {
3811     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3812     /* read in my part of the matrix numerical values  */
3813     nz = procsnz[0];
3814     vals = buf;
3815     mycols = ibuf;
3816     if (size == 1)  nz -= extra_rows;
3817     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3818     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3819 
3820     /* insert into matrix */
3821     jj      = rstart*bs;
3822     for (i=0; i<m; i++) {
3823       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3824       mycols += locrowlens[i];
3825       vals   += locrowlens[i];
3826       jj++;
3827     }
3828     /* read in other processors (except the last one) and ship out */
3829     for (i=1; i<size-1; i++) {
3830       nz   = procsnz[i];
3831       vals = buf;
3832       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3833       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3834     }
3835     /* the last proc */
3836     if (size != 1){
3837       nz   = procsnz[i] - extra_rows;
3838       vals = buf;
3839       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3840       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3841       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3842     }
3843     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3844   } else {
3845     /* receive numeric values */
3846     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3847 
3848     /* receive message of values*/
3849     vals   = buf;
3850     mycols = ibuf;
3851     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3852     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3853     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3854 
3855     /* insert into matrix */
3856     jj      = rstart*bs;
3857     for (i=0; i<m; i++) {
3858       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3859       mycols += locrowlens[i];
3860       vals   += locrowlens[i];
3861       jj++;
3862     }
3863   }
3864   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3865   ierr = PetscFree(buf);CHKERRQ(ierr);
3866   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3867   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3868   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3869   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3870   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3871   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3872 
3873   PetscFunctionReturn(0);
3874 }
3875 
3876 #undef __FUNCT__
3877 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3878 /*@
3879    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3880 
3881    Input Parameters:
3882 .  mat  - the matrix
3883 .  fact - factor
3884 
3885    Not Collective, each process can use a different factor
3886 
3887    Level: advanced
3888 
3889   Notes:
3890    This can also be set by the command line option: -mat_use_hash_table <fact>
3891 
3892 .keywords: matrix, hashtable, factor, HT
3893 
3894 .seealso: MatSetOption()
3895 @*/
3896 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3897 {
3898   PetscErrorCode ierr;
3899 
3900   PetscFunctionBegin;
3901   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3902   PetscFunctionReturn(0);
3903 }
3904 
3905 EXTERN_C_BEGIN
3906 #undef __FUNCT__
3907 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3908 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3909 {
3910   Mat_MPIBAIJ *baij;
3911 
3912   PetscFunctionBegin;
3913   baij = (Mat_MPIBAIJ*)mat->data;
3914   baij->ht_fact = fact;
3915   PetscFunctionReturn(0);
3916 }
3917 EXTERN_C_END
3918 
3919 #undef __FUNCT__
3920 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3921 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3922 {
3923   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3924   PetscFunctionBegin;
3925   *Ad     = a->A;
3926   *Ao     = a->B;
3927   *colmap = a->garray;
3928   PetscFunctionReturn(0);
3929 }
3930 
3931 /*
3932     Special version for direct calls from Fortran (to eliminate two function call overheads
3933 */
3934 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3935 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3936 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3937 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3938 #endif
3939 
3940 #undef __FUNCT__
3941 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3942 /*@C
3943   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3944 
3945   Collective on Mat
3946 
3947   Input Parameters:
3948 + mat - the matrix
3949 . min - number of input rows
3950 . im - input rows
3951 . nin - number of input columns
3952 . in - input columns
3953 . v - numerical values input
3954 - addvin - INSERT_VALUES or ADD_VALUES
3955 
3956   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3957 
3958   Level: advanced
3959 
3960 .seealso:   MatSetValuesBlocked()
3961 @*/
3962 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3963 {
3964   /* convert input arguments to C version */
3965   Mat             mat = *matin;
3966   PetscInt        m = *min, n = *nin;
3967   InsertMode      addv = *addvin;
3968 
3969   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3970   const MatScalar *value;
3971   MatScalar       *barray=baij->barray;
3972   PetscBool       roworiented = baij->roworiented;
3973   PetscErrorCode  ierr;
3974   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3975   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3976   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3977 
3978   PetscFunctionBegin;
3979   /* tasks normally handled by MatSetValuesBlocked() */
3980   if (mat->insertmode == NOT_SET_VALUES) {
3981     mat->insertmode = addv;
3982   }
3983 #if defined(PETSC_USE_DEBUG)
3984   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3985   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3986 #endif
3987   if (mat->assembled) {
3988     mat->was_assembled = PETSC_TRUE;
3989     mat->assembled     = PETSC_FALSE;
3990   }
3991   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3992 
3993 
3994   if(!barray) {
3995     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3996     baij->barray = barray;
3997   }
3998 
3999   if (roworiented) {
4000     stepval = (n-1)*bs;
4001   } else {
4002     stepval = (m-1)*bs;
4003   }
4004   for (i=0; i<m; i++) {
4005     if (im[i] < 0) continue;
4006 #if defined(PETSC_USE_DEBUG)
4007     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4008 #endif
4009     if (im[i] >= rstart && im[i] < rend) {
4010       row = im[i] - rstart;
4011       for (j=0; j<n; j++) {
4012         /* If NumCol = 1 then a copy is not required */
4013         if ((roworiented) && (n == 1)) {
4014           barray = (MatScalar*)v + i*bs2;
4015         } else if((!roworiented) && (m == 1)) {
4016           barray = (MatScalar*)v + j*bs2;
4017         } else { /* Here a copy is required */
4018           if (roworiented) {
4019             value = v + i*(stepval+bs)*bs + j*bs;
4020           } else {
4021             value = v + j*(stepval+bs)*bs + i*bs;
4022           }
4023           for (ii=0; ii<bs; ii++,value+=stepval) {
4024             for (jj=0; jj<bs; jj++) {
4025               *barray++  = *value++;
4026             }
4027           }
4028           barray -=bs2;
4029         }
4030 
4031         if (in[j] >= cstart && in[j] < cend){
4032           col  = in[j] - cstart;
4033           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4034         }
4035         else if (in[j] < 0) continue;
4036 #if defined(PETSC_USE_DEBUG)
4037         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4038 #endif
4039         else {
4040           if (mat->was_assembled) {
4041             if (!baij->colmap) {
4042               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4043             }
4044 
4045 #if defined(PETSC_USE_DEBUG)
4046 #if defined (PETSC_USE_CTABLE)
4047             { PetscInt data;
4048               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4049               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4050             }
4051 #else
4052             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4053 #endif
4054 #endif
4055 #if defined (PETSC_USE_CTABLE)
4056 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4057             col  = (col - 1)/bs;
4058 #else
4059             col = (baij->colmap[in[j]] - 1)/bs;
4060 #endif
4061             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4062               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4063               col =  in[j];
4064             }
4065           }
4066           else col = in[j];
4067           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4068         }
4069       }
4070     } else {
4071       if (!baij->donotstash) {
4072         if (roworiented) {
4073           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4074         } else {
4075           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4076         }
4077       }
4078     }
4079   }
4080 
4081   /* task normally handled by MatSetValuesBlocked() */
4082   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4083   PetscFunctionReturn(0);
4084 }
4085 
4086 #undef __FUNCT__
4087 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4088 /*@
4089      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4090          CSR format the local rows.
4091 
4092    Collective on MPI_Comm
4093 
4094    Input Parameters:
4095 +  comm - MPI communicator
4096 .  bs - the block size, only a block size of 1 is supported
4097 .  m - number of local rows (Cannot be PETSC_DECIDE)
4098 .  n - This value should be the same as the local size used in creating the
4099        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4100        calculated if N is given) For square matrices n is almost always m.
4101 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4102 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4103 .   i - row indices
4104 .   j - column indices
4105 -   a - matrix values
4106 
4107    Output Parameter:
4108 .   mat - the matrix
4109 
4110    Level: intermediate
4111 
4112    Notes:
4113        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4114      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4115      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4116 
4117        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4118 
4119 .keywords: matrix, aij, compressed row, sparse, parallel
4120 
4121 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4122           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4123 @*/
4124 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4125 {
4126   PetscErrorCode ierr;
4127 
4128 
4129  PetscFunctionBegin;
4130   if (i[0]) {
4131     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4132   }
4133   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4134   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4135   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4136   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4137   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4138   PetscFunctionReturn(0);
4139 }
4140