xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ed1caa07d30b4bf52fa72dab33adf574bdef2af0)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
16 
17 /*  UGLY, ugly, ugly
18    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
19    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
20    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
21    converts the entries into single precision and then calls ..._MatScalar() to put them
22    into the single precision data structures.
23 */
24 #if defined(PETSC_USE_MAT_SINGLE)
25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
30 #else
31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
32 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
34 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
36 #endif
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
41 {
42   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45   PetscScalar    *va,*vb;
46   Vec            vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
110   PetscErrorCode ierr;
111   PetscInt       nbs = B->nbs,i,bs=mat->bs;
112 
113   PetscFunctionBegin;
114 #if defined (PETSC_USE_CTABLE)
115   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
116   for (i=0; i<nbs; i++){
117     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
118   }
119 #else
120   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
121   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
123   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
124 #endif
125   PetscFunctionReturn(0);
126 }
127 
128 #define CHUNKSIZE  10
129 
130 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
131 { \
132  \
133     brow = row/bs;  \
134     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135     rmax = aimax[brow]; nrow = ailen[brow]; \
136       bcol = col/bs; \
137       ridx = row % bs; cidx = col % bs; \
138       low = 0; high = nrow; \
139       while (high-low > 3) { \
140         t = (low+high)/2; \
141         if (rp[t] > bcol) high = t; \
142         else              low  = t; \
143       } \
144       for (_i=low; _i<high; _i++) { \
145         if (rp[_i] > bcol) break; \
146         if (rp[_i] == bcol) { \
147           bap  = ap +  bs2*_i + bs*cidx + ridx; \
148           if (addv == ADD_VALUES) *bap += value;  \
149           else                    *bap  = value;  \
150           goto a_noinsert; \
151         } \
152       } \
153       if (a->nonew == 1) goto a_noinsert; \
154       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
155       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 
169 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
193       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
194       CHKMEMQ;\
195       N = nrow++ - 1;  \
196       /* shift up all the later entries in this row */ \
197       for (ii=N; ii>=_i; ii--) { \
198         rp[ii+1] = rp[ii]; \
199         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
200       } \
201       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
202       rp[_i]                      = bcol;  \
203       ap[bs2*_i + bs*cidx + ridx] = value;  \
204       b_noinsert:; \
205     bilen[brow] = nrow; \
206 }
207 
208 #if defined(PETSC_USE_MAT_SINGLE)
209 #undef __FUNCT__
210 #define __FUNCT__ "MatSetValues_MPIBAIJ"
211 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
212 {
213   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
214   PetscErrorCode ierr;
215   PetscInt       i,N = m*n;
216   MatScalar      *vsingle;
217 
218   PetscFunctionBegin;
219   if (N > b->setvalueslen) {
220     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
221     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
222     b->setvalueslen  = N;
223   }
224   vsingle = b->setvaluescopy;
225 
226   for (i=0; i<N; i++) {
227     vsingle[i] = v[i];
228   }
229   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
235 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
238   PetscErrorCode ierr;
239   PetscInt       i,N = m*n*b->bs2;
240   MatScalar      *vsingle;
241 
242   PetscFunctionBegin;
243   if (N > b->setvalueslen) {
244     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
245     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
246     b->setvalueslen  = N;
247   }
248   vsingle = b->setvaluescopy;
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
258 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
281 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   PetscInt       i,N = m*n*b->bs2;
286   MatScalar      *vsingle;
287 
288   PetscFunctionBegin;
289   if (N > b->setvalueslen) {
290     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
291     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
292     b->setvalueslen  = N;
293   }
294   vsingle = b->setvaluescopy;
295   for (i=0; i<N; i++) {
296     vsingle[i] = v[i];
297   }
298   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 #endif
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
305 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
306 {
307   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
308   MatScalar      value;
309   PetscTruth     roworiented = baij->roworiented;
310   PetscErrorCode ierr;
311   PetscInt       i,j,row,col;
312   PetscInt       rstart_orig=baij->rstart_bs;
313   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
314   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
315 
316   /* Some Variables required in the macro */
317   Mat            A = baij->A;
318   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
319   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
320   MatScalar      *aa=a->a;
321 
322   Mat            B = baij->B;
323   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
324   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
325   MatScalar      *ba=b->a;
326 
327   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
328   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
329   MatScalar      *ap,*bap;
330 
331   PetscFunctionBegin;
332   for (i=0; i<m; i++) {
333     if (im[i] < 0) continue;
334 #if defined(PETSC_USE_DEBUG)
335     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
336 #endif
337     if (im[i] >= rstart_orig && im[i] < rend_orig) {
338       row = im[i] - rstart_orig;
339       for (j=0; j<n; j++) {
340         if (in[j] >= cstart_orig && in[j] < cend_orig){
341           col = in[j] - cstart_orig;
342           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
343           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
344           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
345         } else if (in[j] < 0) continue;
346 #if defined(PETSC_USE_DEBUG)
347         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
348 #endif
349         else {
350           if (mat->was_assembled) {
351             if (!baij->colmap) {
352               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
353             }
354 #if defined (PETSC_USE_CTABLE)
355             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
356             col  = col - 1;
357 #else
358             col = baij->colmap[in[j]/bs] - 1;
359 #endif
360             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
361               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
362               col =  in[j];
363               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
364               B = baij->B;
365               b = (Mat_SeqBAIJ*)(B)->data;
366               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
367               ba=b->a;
368             } else col += in[j]%bs;
369           } else col = in[j];
370           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
371           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
372           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
373         }
374       }
375     } else {
376       if (!baij->donotstash) {
377         if (roworiented) {
378           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
379         } else {
380           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
381         }
382       }
383     }
384   }
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
391 {
392   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
393   const MatScalar *value;
394   MatScalar       *barray=baij->barray;
395   PetscTruth      roworiented = baij->roworiented;
396   PetscErrorCode  ierr;
397   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
398   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
399   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
400 
401   PetscFunctionBegin;
402   if(!barray) {
403     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
404     baij->barray = barray;
405   }
406 
407   if (roworiented) {
408     stepval = (n-1)*bs;
409   } else {
410     stepval = (m-1)*bs;
411   }
412   for (i=0; i<m; i++) {
413     if (im[i] < 0) continue;
414 #if defined(PETSC_USE_DEBUG)
415     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
416 #endif
417     if (im[i] >= rstart && im[i] < rend) {
418       row = im[i] - rstart;
419       for (j=0; j<n; j++) {
420         /* If NumCol = 1 then a copy is not required */
421         if ((roworiented) && (n == 1)) {
422           barray = (MatScalar*)v + i*bs2;
423         } else if((!roworiented) && (m == 1)) {
424           barray = (MatScalar*)v + j*bs2;
425         } else { /* Here a copy is required */
426           if (roworiented) {
427             value = v + i*(stepval+bs)*bs + j*bs;
428           } else {
429             value = v + j*(stepval+bs)*bs + i*bs;
430           }
431           for (ii=0; ii<bs; ii++,value+=stepval) {
432             for (jj=0; jj<bs; jj++) {
433               *barray++  = *value++;
434             }
435           }
436           barray -=bs2;
437         }
438 
439         if (in[j] >= cstart && in[j] < cend){
440           col  = in[j] - cstart;
441           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
442         }
443         else if (in[j] < 0) continue;
444 #if defined(PETSC_USE_DEBUG)
445         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
446 #endif
447         else {
448           if (mat->was_assembled) {
449             if (!baij->colmap) {
450               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
451             }
452 
453 #if defined(PETSC_USE_DEBUG)
454 #if defined (PETSC_USE_CTABLE)
455             { PetscInt data;
456               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
457               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
458             }
459 #else
460             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461 #endif
462 #endif
463 #if defined (PETSC_USE_CTABLE)
464 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
465             col  = (col - 1)/bs;
466 #else
467             col = (baij->colmap[in[j]] - 1)/bs;
468 #endif
469             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
470               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
471               col =  in[j];
472             }
473           }
474           else col = in[j];
475           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
476         }
477       }
478     } else {
479       if (!baij->donotstash) {
480         if (roworiented) {
481           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
482         } else {
483           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
484         }
485       }
486     }
487   }
488   PetscFunctionReturn(0);
489 }
490 
491 #define HASH_KEY 0.6180339887
492 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
493 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
494 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
495 #undef __FUNCT__
496 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
497 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
498 {
499   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
500   PetscTruth     roworiented = baij->roworiented;
501   PetscErrorCode ierr;
502   PetscInt       i,j,row,col;
503   PetscInt       rstart_orig=baij->rstart_bs;
504   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
505   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
506   PetscReal      tmp;
507   MatScalar      **HD = baij->hd,value;
508 #if defined(PETSC_USE_DEBUG)
509   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
510 #endif
511 
512   PetscFunctionBegin;
513 
514   for (i=0; i<m; i++) {
515 #if defined(PETSC_USE_DEBUG)
516     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
517     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
518 #endif
519       row = im[i];
520     if (row >= rstart_orig && row < rend_orig) {
521       for (j=0; j<n; j++) {
522         col = in[j];
523         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
524         /* Look up PetscInto the Hash Table */
525         key = (row/bs)*Nbs+(col/bs)+1;
526         h1  = HASH(size,key,tmp);
527 
528 
529         idx = h1;
530 #if defined(PETSC_USE_DEBUG)
531         insert_ct++;
532         total_ct++;
533         if (HT[idx] != key) {
534           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
535           if (idx == size) {
536             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
537             if (idx == h1) {
538               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
539             }
540           }
541         }
542 #else
543         if (HT[idx] != key) {
544           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
545           if (idx == size) {
546             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
547             if (idx == h1) {
548               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
549             }
550           }
551         }
552 #endif
553         /* A HASH table entry is found, so insert the values at the correct address */
554         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
555         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
556       }
557     } else {
558       if (!baij->donotstash) {
559         if (roworiented) {
560           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
561         } else {
562           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
563         }
564       }
565     }
566   }
567 #if defined(PETSC_USE_DEBUG)
568   baij->ht_total_ct = total_ct;
569   baij->ht_insert_ct = insert_ct;
570 #endif
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
576 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
577 {
578   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
579   PetscTruth      roworiented = baij->roworiented;
580   PetscErrorCode  ierr;
581   PetscInt        i,j,ii,jj,row,col;
582   PetscInt        rstart=baij->rstart ;
583   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
584   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
585   PetscReal       tmp;
586   MatScalar       **HD = baij->hd,*baij_a;
587   const MatScalar *v_t,*value;
588 #if defined(PETSC_USE_DEBUG)
589   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
590 #endif
591 
592   PetscFunctionBegin;
593 
594   if (roworiented) {
595     stepval = (n-1)*bs;
596   } else {
597     stepval = (m-1)*bs;
598   }
599   for (i=0; i<m; i++) {
600 #if defined(PETSC_USE_DEBUG)
601     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
602     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
603 #endif
604     row   = im[i];
605     v_t   = v + i*bs2;
606     if (row >= rstart && row < rend) {
607       for (j=0; j<n; j++) {
608         col = in[j];
609 
610         /* Look up into the Hash Table */
611         key = row*Nbs+col+1;
612         h1  = HASH(size,key,tmp);
613 
614         idx = h1;
615 #if defined(PETSC_USE_DEBUG)
616         total_ct++;
617         insert_ct++;
618        if (HT[idx] != key) {
619           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
620           if (idx == size) {
621             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
622             if (idx == h1) {
623               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
624             }
625           }
626         }
627 #else
628         if (HT[idx] != key) {
629           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
630           if (idx == size) {
631             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
632             if (idx == h1) {
633               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
634             }
635           }
636         }
637 #endif
638         baij_a = HD[idx];
639         if (roworiented) {
640           /*value = v + i*(stepval+bs)*bs + j*bs;*/
641           /* value = v + (i*(stepval+bs)+j)*bs; */
642           value = v_t;
643           v_t  += bs;
644           if (addv == ADD_VALUES) {
645             for (ii=0; ii<bs; ii++,value+=stepval) {
646               for (jj=ii; jj<bs2; jj+=bs) {
647                 baij_a[jj]  += *value++;
648               }
649             }
650           } else {
651             for (ii=0; ii<bs; ii++,value+=stepval) {
652               for (jj=ii; jj<bs2; jj+=bs) {
653                 baij_a[jj]  = *value++;
654               }
655             }
656           }
657         } else {
658           value = v + j*(stepval+bs)*bs + i*bs;
659           if (addv == ADD_VALUES) {
660             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
661               for (jj=0; jj<bs; jj++) {
662                 baij_a[jj]  += *value++;
663               }
664             }
665           } else {
666             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
667               for (jj=0; jj<bs; jj++) {
668                 baij_a[jj]  = *value++;
669               }
670             }
671           }
672         }
673       }
674     } else {
675       if (!baij->donotstash) {
676         if (roworiented) {
677           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
678         } else {
679           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
680         }
681       }
682     }
683   }
684 #if defined(PETSC_USE_DEBUG)
685   baij->ht_total_ct = total_ct;
686   baij->ht_insert_ct = insert_ct;
687 #endif
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatGetValues_MPIBAIJ"
693 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
694 {
695   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
696   PetscErrorCode ierr;
697   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
698   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
699 
700   PetscFunctionBegin;
701   for (i=0; i<m; i++) {
702     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
703     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
704     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
705       row = idxm[i] - bsrstart;
706       for (j=0; j<n; j++) {
707         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
708         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
709         if (idxn[j] >= bscstart && idxn[j] < bscend){
710           col = idxn[j] - bscstart;
711           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
712         } else {
713           if (!baij->colmap) {
714             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
715           }
716 #if defined (PETSC_USE_CTABLE)
717           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
718           data --;
719 #else
720           data = baij->colmap[idxn[j]/bs]-1;
721 #endif
722           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
723           else {
724             col  = data + idxn[j]%bs;
725             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
726           }
727         }
728       }
729     } else {
730       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
731     }
732   }
733  PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatNorm_MPIBAIJ"
738 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
739 {
740   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
741   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
742   PetscErrorCode ierr;
743   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col;
744   PetscReal      sum = 0.0;
745   MatScalar      *v;
746 
747   PetscFunctionBegin;
748   if (baij->size == 1) {
749     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
750   } else {
751     if (type == NORM_FROBENIUS) {
752       v = amat->a;
753       nz = amat->nz*bs2;
754       for (i=0; i<nz; i++) {
755 #if defined(PETSC_USE_COMPLEX)
756         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
757 #else
758         sum += (*v)*(*v); v++;
759 #endif
760       }
761       v = bmat->a;
762       nz = bmat->nz*bs2;
763       for (i=0; i<nz; i++) {
764 #if defined(PETSC_USE_COMPLEX)
765         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
766 #else
767         sum += (*v)*(*v); v++;
768 #endif
769       }
770       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
771       *nrm = sqrt(*nrm);
772     } else if (type == NORM_1) { /* max column sum */
773       PetscReal *tmp,*tmp2;
774       PetscInt  *jj,*garray=baij->garray,cstart=baij->cstart;
775       ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
776       tmp2 = tmp + mat->N;
777       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
778       v = amat->a; jj = amat->j;
779       for (i=0; i<amat->nz; i++) {
780         for (j=0; j<bs; j++){
781           col = bs*(cstart + *jj) + j; /* column index */
782           for (row=0; row<bs; row++){
783             tmp[col] += PetscAbsScalar(*v);  v++;
784           }
785         }
786         jj++;
787       }
788       v = bmat->a; jj = bmat->j;
789       for (i=0; i<bmat->nz; i++) {
790         for (j=0; j<bs; j++){
791           col = bs*garray[*jj] + j;
792           for (row=0; row<bs; row++){
793             tmp[col] += PetscAbsScalar(*v); v++;
794           }
795         }
796         jj++;
797       }
798       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
799       *nrm = 0.0;
800       for (j=0; j<mat->N; j++) {
801         if (tmp2[j] > *nrm) *nrm = tmp2[j];
802       }
803       ierr = PetscFree(tmp);CHKERRQ(ierr);
804     } else if (type == NORM_INFINITY) { /* max row sum */
805       PetscReal *sums;
806       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
807       sum = 0.0;
808       for (j=0; j<amat->mbs; j++) {
809         for (row=0; row<bs; row++) sums[row] = 0.0;
810         v = amat->a + bs2*amat->i[j];
811         nz = amat->i[j+1]-amat->i[j];
812         for (i=0; i<nz; i++) {
813           for (col=0; col<bs; col++){
814             for (row=0; row<bs; row++){
815               sums[row] += PetscAbsScalar(*v); v++;
816             }
817           }
818         }
819         v = bmat->a + bs2*bmat->i[j];
820         nz = bmat->i[j+1]-bmat->i[j];
821         for (i=0; i<nz; i++) {
822           for (col=0; col<bs; col++){
823             for (row=0; row<bs; row++){
824               sums[row] += PetscAbsScalar(*v); v++;
825             }
826           }
827         }
828         for (row=0; row<bs; row++){
829           if (sums[row] > sum) sum = sums[row];
830         }
831       }
832       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
833       ierr = PetscFree(sums);CHKERRQ(ierr);
834     } else {
835       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
836     }
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat            A = baij->A,B=baij->B;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   PetscErrorCode ierr;
857   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
858   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
859   PetscInt       *HT,key;
860   MatScalar      **HD;
861   PetscReal      tmp;
862 #if defined(PETSC_USE_DEBUG)
863   PetscInt       ct=0,max=0;
864 #endif
865 
866   PetscFunctionBegin;
867   baij->ht_size=(PetscInt)(factor*nz);
868   size = baij->ht_size;
869 
870   if (baij->ht) {
871     PetscFunctionReturn(0);
872   }
873 
874   /* Allocate Memory for Hash Table */
875   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
876   baij->ht = (PetscInt*)(baij->hd + size);
877   HD       = baij->hd;
878   HT       = baij->ht;
879 
880 
881   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
882 
883 
884   /* Loop Over A */
885   for (i=0; i<a->mbs; i++) {
886     for (j=ai[i]; j<ai[i+1]; j++) {
887       row = i+rstart;
888       col = aj[j]+cstart;
889 
890       key = row*Nbs + col + 1;
891       h1  = HASH(size,key,tmp);
892       for (k=0; k<size; k++){
893         if (!HT[(h1+k)%size]) {
894           HT[(h1+k)%size] = key;
895           HD[(h1+k)%size] = a->a + j*bs2;
896           break;
897 #if defined(PETSC_USE_DEBUG)
898         } else {
899           ct++;
900 #endif
901         }
902       }
903 #if defined(PETSC_USE_DEBUG)
904       if (k> max) max = k;
905 #endif
906     }
907   }
908   /* Loop Over B */
909   for (i=0; i<b->mbs; i++) {
910     for (j=bi[i]; j<bi[i+1]; j++) {
911       row = i+rstart;
912       col = garray[bj[j]];
913       key = row*Nbs + col + 1;
914       h1  = HASH(size,key,tmp);
915       for (k=0; k<size; k++){
916         if (!HT[(h1+k)%size]) {
917           HT[(h1+k)%size] = key;
918           HD[(h1+k)%size] = b->a + j*bs2;
919           break;
920 #if defined(PETSC_USE_DEBUG)
921         } else {
922           ct++;
923 #endif
924         }
925       }
926 #if defined(PETSC_USE_DEBUG)
927       if (k> max) max = k;
928 #endif
929     }
930   }
931 
932   /* Print Summary */
933 #if defined(PETSC_USE_DEBUG)
934   for (i=0,j=0; i<size; i++) {
935     if (HT[i]) {j++;}
936   }
937   ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr);
938 #endif
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
944 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
945 {
946   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
947   PetscErrorCode ierr;
948   PetscInt       nstash,reallocs;
949   InsertMode     addv;
950 
951   PetscFunctionBegin;
952   if (baij->donotstash) {
953     PetscFunctionReturn(0);
954   }
955 
956   /* make sure all processors are either in INSERTMODE or ADDMODE */
957   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
958   if (addv == (ADD_VALUES|INSERT_VALUES)) {
959     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
960   }
961   mat->insertmode = addv; /* in case this processor had no cache */
962 
963   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
964   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
965   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
966   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
967   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
968   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
974 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
975 {
976   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
977   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
978   PetscErrorCode ierr;
979   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
980   PetscInt       *row,*col,other_disassembled;
981   PetscTruth     r1,r2,r3;
982   MatScalar      *val;
983   InsertMode     addv = mat->insertmode;
984   PetscMPIInt    n;
985 
986   PetscFunctionBegin;
987   if (!baij->donotstash) {
988     while (1) {
989       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
990       if (!flg) break;
991 
992       for (i=0; i<n;) {
993         /* Now identify the consecutive vals belonging to the same row */
994         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995         if (j < n) ncols = j-i;
996         else       ncols = n-i;
997         /* Now assemble all these values with a single function call */
998         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999         i = j;
1000       }
1001     }
1002     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003     /* Now process the block-stash. Since the values are stashed column-oriented,
1004        set the roworiented flag to column oriented, and after MatSetValues()
1005        restore the original flags */
1006     r1 = baij->roworiented;
1007     r2 = a->roworiented;
1008     r3 = b->roworiented;
1009     baij->roworiented = PETSC_FALSE;
1010     a->roworiented    = PETSC_FALSE;
1011     b->roworiented    = PETSC_FALSE;
1012     while (1) {
1013       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014       if (!flg) break;
1015 
1016       for (i=0; i<n;) {
1017         /* Now identify the consecutive vals belonging to the same row */
1018         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019         if (j < n) ncols = j-i;
1020         else       ncols = n-i;
1021         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022         i = j;
1023       }
1024     }
1025     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026     baij->roworiented = r1;
1027     a->roworiented    = r2;
1028     b->roworiented    = r3;
1029   }
1030 
1031   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033 
1034   /* determine if any processor has disassembled, if so we must
1035      also disassemble ourselfs, in order that we may reassemble. */
1036   /*
1037      if nonzero structure of submatrix B cannot change then we know that
1038      no processor disassembled thus we can skip this stuff
1039   */
1040   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042     if (mat->was_assembled && !other_disassembled) {
1043       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1044     }
1045   }
1046 
1047   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1049   }
1050   b->compressedrow.use = PETSC_TRUE;
1051   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1052   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1053 
1054 #if defined(PETSC_USE_DEBUG)
1055   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1056     ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr);
1057     baij->ht_total_ct  = 0;
1058     baij->ht_insert_ct = 0;
1059   }
1060 #endif
1061   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1062     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1063     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1064     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1065   }
1066 
1067   if (baij->rowvalues) {
1068     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1069     baij->rowvalues = 0;
1070   }
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1076 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1077 {
1078   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1079   PetscErrorCode    ierr;
1080   PetscMPIInt       size = baij->size,rank = baij->rank;
1081   PetscInt          bs = mat->bs;
1082   PetscTruth        iascii,isdraw;
1083   PetscViewer       sviewer;
1084   PetscViewerFormat format;
1085 
1086   PetscFunctionBegin;
1087   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1088   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1089   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1090   if (iascii) {
1091     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1092     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1093       MatInfo info;
1094       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1095       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1096       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1097               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1098               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1099       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1100       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1101       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1102       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1103       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1104       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1105       PetscFunctionReturn(0);
1106     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1107       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1108       PetscFunctionReturn(0);
1109     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1110       PetscFunctionReturn(0);
1111     }
1112   }
1113 
1114   if (isdraw) {
1115     PetscDraw       draw;
1116     PetscTruth isnull;
1117     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1118     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1119   }
1120 
1121   if (size == 1) {
1122     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1123     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1124   } else {
1125     /* assemble the entire matrix onto first processor. */
1126     Mat         A;
1127     Mat_SeqBAIJ *Aloc;
1128     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1129     MatScalar   *a;
1130 
1131     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1132     /* Perhaps this should be the type of mat? */
1133     if (!rank) {
1134       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1135     } else {
1136       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1137     }
1138     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1139     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1140     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1141 
1142     /* copy over the A part */
1143     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1144     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1145     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1146 
1147     for (i=0; i<mbs; i++) {
1148       rvals[0] = bs*(baij->rstart + i);
1149       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1150       for (j=ai[i]; j<ai[i+1]; j++) {
1151         col = (baij->cstart+aj[j])*bs;
1152         for (k=0; k<bs; k++) {
1153           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1154           col++; a += bs;
1155         }
1156       }
1157     }
1158     /* copy over the B part */
1159     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1160     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1161     for (i=0; i<mbs; i++) {
1162       rvals[0] = bs*(baij->rstart + i);
1163       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1164       for (j=ai[i]; j<ai[i+1]; j++) {
1165         col = baij->garray[aj[j]]*bs;
1166         for (k=0; k<bs; k++) {
1167           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1168           col++; a += bs;
1169         }
1170       }
1171     }
1172     ierr = PetscFree(rvals);CHKERRQ(ierr);
1173     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1174     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1175     /*
1176        Everyone has to call to draw the matrix since the graphics waits are
1177        synchronized across all processors that share the PetscDraw object
1178     */
1179     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1180     if (!rank) {
1181       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1182       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1183     }
1184     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1185     ierr = MatDestroy(A);CHKERRQ(ierr);
1186   }
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatView_MPIBAIJ"
1192 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1193 {
1194   PetscErrorCode ierr;
1195   PetscTruth     iascii,isdraw,issocket,isbinary;
1196 
1197   PetscFunctionBegin;
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1202   if (iascii || isdraw || issocket || isbinary) {
1203     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1204   } else {
1205     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1212 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1213 {
1214   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218 #if defined(PETSC_USE_LOG)
1219   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1220 #endif
1221   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1222   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1223   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1224   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1225   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1226 #if defined (PETSC_USE_CTABLE)
1227   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1228 #else
1229   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1230 #endif
1231   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1232   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1233   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1234   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1235   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1236   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1237 #if defined(PETSC_USE_MAT_SINGLE)
1238   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1239 #endif
1240   ierr = PetscFree(baij);CHKERRQ(ierr);
1241 
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatMult_MPIBAIJ"
1254 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1255 {
1256   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258   PetscInt       nt;
1259 
1260   PetscFunctionBegin;
1261   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1262   if (nt != A->n) {
1263     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1264   }
1265   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1266   if (nt != A->m) {
1267     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1268   }
1269   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1270   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1271   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1272   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1273   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1279 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1280 {
1281   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1282   PetscErrorCode ierr;
1283 
1284   PetscFunctionBegin;
1285   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1286   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1287   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1288   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1294 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1295 {
1296   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1297   PetscErrorCode ierr;
1298   PetscTruth     merged;
1299 
1300   PetscFunctionBegin;
1301   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1302   /* do nondiagonal part */
1303   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1304   if (!merged) {
1305     /* send it on its way */
1306     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1307     /* do local part */
1308     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1309     /* receive remote parts: note this assumes the values are not actually */
1310     /* inserted in yy until the next line */
1311     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1312   } else {
1313     /* do local part */
1314     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1315     /* send it on its way */
1316     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1317     /* values actually were received in the Begin() but we need to call this nop */
1318     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1319   }
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1325 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1326 {
1327   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329 
1330   PetscFunctionBegin;
1331   /* do nondiagonal part */
1332   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1333   /* send it on its way */
1334   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1335   /* do local part */
1336   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1337   /* receive remote parts: note this assumes the values are not actually */
1338   /* inserted in yy until the next line, which is true for my implementation*/
1339   /* but is not perhaps always true. */
1340   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 /*
1345   This only works correctly for square matrices where the subblock A->A is the
1346    diagonal block
1347 */
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1350 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1351 {
1352   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1353   PetscErrorCode ierr;
1354 
1355   PetscFunctionBegin;
1356   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1357   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatScale_MPIBAIJ"
1363 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1364 {
1365   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1366   PetscErrorCode ierr;
1367 
1368   PetscFunctionBegin;
1369   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1370   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1376 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1377 {
1378   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1379   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1380   PetscErrorCode ierr;
1381   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1382   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1383   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1384 
1385   PetscFunctionBegin;
1386   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1387   mat->getrowactive = PETSC_TRUE;
1388 
1389   if (!mat->rowvalues && (idx || v)) {
1390     /*
1391         allocate enough space to hold information from the longest row.
1392     */
1393     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1394     PetscInt     max = 1,mbs = mat->mbs,tmp;
1395     for (i=0; i<mbs; i++) {
1396       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1397       if (max < tmp) { max = tmp; }
1398     }
1399     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1400     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1401   }
1402 
1403   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1404   lrow = row - brstart;
1405 
1406   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1407   if (!v)   {pvA = 0; pvB = 0;}
1408   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1409   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1410   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1411   nztot = nzA + nzB;
1412 
1413   cmap  = mat->garray;
1414   if (v  || idx) {
1415     if (nztot) {
1416       /* Sort by increasing column numbers, assuming A and B already sorted */
1417       PetscInt imark = -1;
1418       if (v) {
1419         *v = v_p = mat->rowvalues;
1420         for (i=0; i<nzB; i++) {
1421           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1422           else break;
1423         }
1424         imark = i;
1425         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1426         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1427       }
1428       if (idx) {
1429         *idx = idx_p = mat->rowindices;
1430         if (imark > -1) {
1431           for (i=0; i<imark; i++) {
1432             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1433           }
1434         } else {
1435           for (i=0; i<nzB; i++) {
1436             if (cmap[cworkB[i]/bs] < cstart)
1437               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1438             else break;
1439           }
1440           imark = i;
1441         }
1442         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1443         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1444       }
1445     } else {
1446       if (idx) *idx = 0;
1447       if (v)   *v   = 0;
1448     }
1449   }
1450   *nz = nztot;
1451   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1452   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1458 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1459 {
1460   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1461 
1462   PetscFunctionBegin;
1463   if (!baij->getrowactive) {
1464     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1465   }
1466   baij->getrowactive = PETSC_FALSE;
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1472 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1473 {
1474   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1475   PetscErrorCode ierr;
1476 
1477   PetscFunctionBegin;
1478   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1479   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1485 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1486 {
1487   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1488   Mat            A = a->A,B = a->B;
1489   PetscErrorCode ierr;
1490   PetscReal      isend[5],irecv[5];
1491 
1492   PetscFunctionBegin;
1493   info->block_size     = (PetscReal)matin->bs;
1494   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1495   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1496   isend[3] = info->memory;  isend[4] = info->mallocs;
1497   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1498   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1499   isend[3] += info->memory;  isend[4] += info->mallocs;
1500   if (flag == MAT_LOCAL) {
1501     info->nz_used      = isend[0];
1502     info->nz_allocated = isend[1];
1503     info->nz_unneeded  = isend[2];
1504     info->memory       = isend[3];
1505     info->mallocs      = isend[4];
1506   } else if (flag == MAT_GLOBAL_MAX) {
1507     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1508     info->nz_used      = irecv[0];
1509     info->nz_allocated = irecv[1];
1510     info->nz_unneeded  = irecv[2];
1511     info->memory       = irecv[3];
1512     info->mallocs      = irecv[4];
1513   } else if (flag == MAT_GLOBAL_SUM) {
1514     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1515     info->nz_used      = irecv[0];
1516     info->nz_allocated = irecv[1];
1517     info->nz_unneeded  = irecv[2];
1518     info->memory       = irecv[3];
1519     info->mallocs      = irecv[4];
1520   } else {
1521     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1522   }
1523   info->rows_global       = (PetscReal)A->M;
1524   info->columns_global    = (PetscReal)A->N;
1525   info->rows_local        = (PetscReal)A->m;
1526   info->columns_local     = (PetscReal)A->N;
1527   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1528   info->fill_ratio_needed = 0;
1529   info->factor_mallocs    = 0;
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1535 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1536 {
1537   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1538   PetscErrorCode ierr;
1539 
1540   PetscFunctionBegin;
1541   switch (op) {
1542   case MAT_NO_NEW_NONZERO_LOCATIONS:
1543   case MAT_YES_NEW_NONZERO_LOCATIONS:
1544   case MAT_COLUMNS_UNSORTED:
1545   case MAT_COLUMNS_SORTED:
1546   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1547   case MAT_KEEP_ZEROED_ROWS:
1548   case MAT_NEW_NONZERO_LOCATION_ERR:
1549     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1550     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1551     break;
1552   case MAT_ROW_ORIENTED:
1553     a->roworiented = PETSC_TRUE;
1554     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1555     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1556     break;
1557   case MAT_ROWS_SORTED:
1558   case MAT_ROWS_UNSORTED:
1559   case MAT_YES_NEW_DIAGONALS:
1560     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1561     break;
1562   case MAT_COLUMN_ORIENTED:
1563     a->roworiented = PETSC_FALSE;
1564     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1565     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1566     break;
1567   case MAT_IGNORE_OFF_PROC_ENTRIES:
1568     a->donotstash = PETSC_TRUE;
1569     break;
1570   case MAT_NO_NEW_DIAGONALS:
1571     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1572   case MAT_USE_HASH_TABLE:
1573     a->ht_flag = PETSC_TRUE;
1574     break;
1575   case MAT_SYMMETRIC:
1576   case MAT_STRUCTURALLY_SYMMETRIC:
1577   case MAT_HERMITIAN:
1578   case MAT_SYMMETRY_ETERNAL:
1579     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1580     break;
1581   case MAT_NOT_SYMMETRIC:
1582   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1583   case MAT_NOT_HERMITIAN:
1584   case MAT_NOT_SYMMETRY_ETERNAL:
1585     break;
1586   default:
1587     SETERRQ(PETSC_ERR_SUP,"unknown option");
1588   }
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1594 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1595 {
1596   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1597   Mat_SeqBAIJ    *Aloc;
1598   Mat            B;
1599   PetscErrorCode ierr;
1600   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1601   PetscInt       bs=A->bs,mbs=baij->mbs;
1602   MatScalar      *a;
1603 
1604   PetscFunctionBegin;
1605   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1606   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1607   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1608   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1609 
1610   /* copy over the A part */
1611   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1612   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1613   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1614 
1615   for (i=0; i<mbs; i++) {
1616     rvals[0] = bs*(baij->rstart + i);
1617     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1618     for (j=ai[i]; j<ai[i+1]; j++) {
1619       col = (baij->cstart+aj[j])*bs;
1620       for (k=0; k<bs; k++) {
1621         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1622         col++; a += bs;
1623       }
1624     }
1625   }
1626   /* copy over the B part */
1627   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1628   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1629   for (i=0; i<mbs; i++) {
1630     rvals[0] = bs*(baij->rstart + i);
1631     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1632     for (j=ai[i]; j<ai[i+1]; j++) {
1633       col = baij->garray[aj[j]]*bs;
1634       for (k=0; k<bs; k++) {
1635         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1636         col++; a += bs;
1637       }
1638     }
1639   }
1640   ierr = PetscFree(rvals);CHKERRQ(ierr);
1641   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643 
1644   if (matout) {
1645     *matout = B;
1646   } else {
1647     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1648   }
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1654 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1655 {
1656   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1657   Mat            a = baij->A,b = baij->B;
1658   PetscErrorCode ierr;
1659   PetscInt       s1,s2,s3;
1660 
1661   PetscFunctionBegin;
1662   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1663   if (rr) {
1664     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1665     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1666     /* Overlap communication with computation. */
1667     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1668   }
1669   if (ll) {
1670     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1671     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1672     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1673   }
1674   /* scale  the diagonal block */
1675   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1676 
1677   if (rr) {
1678     /* Do a scatter end and then right scale the off-diagonal block */
1679     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1680     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1681   }
1682 
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1688 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1689 {
1690   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1691   PetscErrorCode ierr;
1692   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1693   PetscInt       i,N,*rows,*owners = l->rowners;
1694   PetscInt       *nprocs,j,idx,nsends,row;
1695   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1696   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1697   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1698   MPI_Comm       comm = A->comm;
1699   MPI_Request    *send_waits,*recv_waits;
1700   MPI_Status     recv_status,*send_status;
1701   IS             istmp;
1702 #if defined(PETSC_DEBUG)
1703   PetscTruth     found = PETSC_FALSE;
1704 #endif
1705 
1706   PetscFunctionBegin;
1707   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1708   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1709 
1710   /*  first count number of contributors to each processor */
1711   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1712   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1713   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1714   j = 0;
1715   for (i=0; i<N; i++) {
1716     if (lastidx > (idx = rows[i])) j = 0;
1717     lastidx = idx;
1718     for (; j<size; j++) {
1719       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1720         nprocs[2*j]++;
1721         nprocs[2*j+1] = 1;
1722         owner[i] = j;
1723 #if defined(PETSC_DEBUG)
1724         found = PETSC_TRUE;
1725 #endif
1726         break;
1727       }
1728     }
1729 #if defined(PETSC_DEBUG)
1730     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1731     found = PETSC_FALSE;
1732 #endif
1733   }
1734   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1735 
1736   /* inform other processors of number of messages and max length*/
1737   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1738 
1739   /* post receives:   */
1740   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1741   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1742   for (i=0; i<nrecvs; i++) {
1743     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1744   }
1745 
1746   /* do sends:
1747      1) starts[i] gives the starting index in svalues for stuff going to
1748      the ith processor
1749   */
1750   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1751   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1752   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1753   starts[0]  = 0;
1754   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1755   for (i=0; i<N; i++) {
1756     svalues[starts[owner[i]]++] = rows[i];
1757   }
1758   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1759 
1760   starts[0] = 0;
1761   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1762   count = 0;
1763   for (i=0; i<size; i++) {
1764     if (nprocs[2*i+1]) {
1765       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1766     }
1767   }
1768   ierr = PetscFree(starts);CHKERRQ(ierr);
1769 
1770   base = owners[rank]*bs;
1771 
1772   /*  wait on receives */
1773   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1774   source = lens + nrecvs;
1775   count  = nrecvs; slen = 0;
1776   while (count) {
1777     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1778     /* unpack receives into our local space */
1779     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1780     source[imdex]  = recv_status.MPI_SOURCE;
1781     lens[imdex]    = n;
1782     slen          += n;
1783     count--;
1784   }
1785   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1786 
1787   /* move the data into the send scatter */
1788   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1789   count = 0;
1790   for (i=0; i<nrecvs; i++) {
1791     values = rvalues + i*nmax;
1792     for (j=0; j<lens[i]; j++) {
1793       lrows[count++] = values[j] - base;
1794     }
1795   }
1796   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1797   ierr = PetscFree(lens);CHKERRQ(ierr);
1798   ierr = PetscFree(owner);CHKERRQ(ierr);
1799   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1800 
1801   /* actually zap the local rows */
1802   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1803   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
1804 
1805   /*
1806         Zero the required rows. If the "diagonal block" of the matrix
1807      is square and the user wishes to set the diagonal we use seperate
1808      code so that MatSetValues() is not called for each diagonal allocating
1809      new memory, thus calling lots of mallocs and slowing things down.
1810 
1811        Contributed by: Mathew Knepley
1812   */
1813   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1814   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1815   if (diag && (l->A->M == l->A->N)) {
1816     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1817   } else if (diag) {
1818     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1819     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1820       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1821 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1822     }
1823     for (i=0; i<slen; i++) {
1824       row  = lrows[i] + rstart_bs;
1825       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1826     }
1827     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1828     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1829   } else {
1830     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1831   }
1832 
1833   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1834   ierr = PetscFree(lrows);CHKERRQ(ierr);
1835 
1836   /* wait on sends */
1837   if (nsends) {
1838     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1839     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1840     ierr = PetscFree(send_status);CHKERRQ(ierr);
1841   }
1842   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1843   ierr = PetscFree(svalues);CHKERRQ(ierr);
1844 
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1850 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1851 {
1852   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1853   MPI_Comm          comm = A->comm;
1854   static PetscTruth called = PETSC_FALSE;
1855   PetscErrorCode    ierr;
1856 
1857   PetscFunctionBegin;
1858   if (!a->rank) {
1859     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1860   }
1861   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1862   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1863   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1869 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1870 {
1871   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1872   PetscErrorCode ierr;
1873 
1874   PetscFunctionBegin;
1875   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1880 
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatEqual_MPIBAIJ"
1883 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1884 {
1885   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1886   Mat            a,b,c,d;
1887   PetscTruth     flg;
1888   PetscErrorCode ierr;
1889 
1890   PetscFunctionBegin;
1891   a = matA->A; b = matA->B;
1892   c = matB->A; d = matB->B;
1893 
1894   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1895   if (flg) {
1896     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1897   }
1898   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1905 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1906 {
1907   PetscErrorCode ierr;
1908 
1909   PetscFunctionBegin;
1910   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 /* -------------------------------------------------------------------*/
1915 static struct _MatOps MatOps_Values = {
1916        MatSetValues_MPIBAIJ,
1917        MatGetRow_MPIBAIJ,
1918        MatRestoreRow_MPIBAIJ,
1919        MatMult_MPIBAIJ,
1920 /* 4*/ MatMultAdd_MPIBAIJ,
1921        MatMultTranspose_MPIBAIJ,
1922        MatMultTransposeAdd_MPIBAIJ,
1923        0,
1924        0,
1925        0,
1926 /*10*/ 0,
1927        0,
1928        0,
1929        0,
1930        MatTranspose_MPIBAIJ,
1931 /*15*/ MatGetInfo_MPIBAIJ,
1932        MatEqual_MPIBAIJ,
1933        MatGetDiagonal_MPIBAIJ,
1934        MatDiagonalScale_MPIBAIJ,
1935        MatNorm_MPIBAIJ,
1936 /*20*/ MatAssemblyBegin_MPIBAIJ,
1937        MatAssemblyEnd_MPIBAIJ,
1938        0,
1939        MatSetOption_MPIBAIJ,
1940        MatZeroEntries_MPIBAIJ,
1941 /*25*/ MatZeroRows_MPIBAIJ,
1942        0,
1943        0,
1944        0,
1945        0,
1946 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1947        0,
1948        0,
1949        0,
1950        0,
1951 /*35*/ MatDuplicate_MPIBAIJ,
1952        0,
1953        0,
1954        0,
1955        0,
1956 /*40*/ 0,
1957        MatGetSubMatrices_MPIBAIJ,
1958        MatIncreaseOverlap_MPIBAIJ,
1959        MatGetValues_MPIBAIJ,
1960        0,
1961 /*45*/ MatPrintHelp_MPIBAIJ,
1962        MatScale_MPIBAIJ,
1963        0,
1964        0,
1965        0,
1966 /*50*/ 0,
1967        0,
1968        0,
1969        0,
1970        0,
1971 /*55*/ 0,
1972        0,
1973        MatSetUnfactored_MPIBAIJ,
1974        0,
1975        MatSetValuesBlocked_MPIBAIJ,
1976 /*60*/ 0,
1977        MatDestroy_MPIBAIJ,
1978        MatView_MPIBAIJ,
1979        MatGetPetscMaps_Petsc,
1980        0,
1981 /*65*/ 0,
1982        0,
1983        0,
1984        0,
1985        0,
1986 /*70*/ MatGetRowMax_MPIBAIJ,
1987        0,
1988        0,
1989        0,
1990        0,
1991 /*75*/ 0,
1992        0,
1993        0,
1994        0,
1995        0,
1996 /*80*/ 0,
1997        0,
1998        0,
1999        0,
2000        MatLoad_MPIBAIJ,
2001 /*85*/ 0,
2002        0,
2003        0,
2004        0,
2005        0,
2006 /*90*/ 0,
2007        0,
2008        0,
2009        0,
2010        0,
2011 /*95*/ 0,
2012        0,
2013        0,
2014        0};
2015 
2016 
2017 EXTERN_C_BEGIN
2018 #undef __FUNCT__
2019 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2020 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2021 {
2022   PetscFunctionBegin;
2023   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2024   *iscopy = PETSC_FALSE;
2025   PetscFunctionReturn(0);
2026 }
2027 EXTERN_C_END
2028 
2029 EXTERN_C_BEGIN
2030 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
2031 EXTERN_C_END
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2035 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2036 {
2037   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2038   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2039   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2040   const PetscInt *JJ;
2041   PetscScalar    *values;
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045 #if defined(PETSC_OPT_g)
2046   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2047 #endif
2048   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2049   o_nnz = d_nnz + m;
2050 
2051   for (i=0; i<m; i++) {
2052     nnz     = I[i+1]- I[i];
2053     JJ      = J + I[i];
2054     nnz_max = PetscMax(nnz_max,nnz);
2055 #if defined(PETSC_OPT_g)
2056     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2057 #endif
2058     for (j=0; j<nnz; j++) {
2059       if (*JJ >= cstart) break;
2060       JJ++;
2061     }
2062     d = 0;
2063     for (; j<nnz; j++) {
2064       if (*JJ++ >= cend) break;
2065       d++;
2066     }
2067     d_nnz[i] = d;
2068     o_nnz[i] = nnz - d;
2069   }
2070   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2071   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2072 
2073   if (v) values = (PetscScalar*)v;
2074   else {
2075     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2076     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2077   }
2078 
2079   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2080   for (i=0; i<m; i++) {
2081     ii   = i + rstart;
2082     nnz  = I[i+1]- I[i];
2083     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2084   }
2085   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2086   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2087   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2088 
2089   if (!v) {
2090     ierr = PetscFree(values);CHKERRQ(ierr);
2091   }
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2097 /*@C
2098    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2099    (the default parallel PETSc format).
2100 
2101    Collective on MPI_Comm
2102 
2103    Input Parameters:
2104 +  A - the matrix
2105 .  i - the indices into j for the start of each local row (starts with zero)
2106 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2107 -  v - optional values in the matrix
2108 
2109    Level: developer
2110 
2111 .keywords: matrix, aij, compressed row, sparse, parallel
2112 
2113 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2114 @*/
2115 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2116 {
2117   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2118 
2119   PetscFunctionBegin;
2120   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2121   if (f) {
2122     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2123   }
2124   PetscFunctionReturn(0);
2125 }
2126 
2127 EXTERN_C_BEGIN
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2130 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2131 {
2132   Mat_MPIBAIJ    *b;
2133   PetscErrorCode ierr;
2134   PetscInt       i;
2135 
2136   PetscFunctionBegin;
2137   B->preallocated = PETSC_TRUE;
2138   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2139 
2140   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2141   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2142   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2143   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2144   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2145   if (d_nnz) {
2146   for (i=0; i<B->m/bs; i++) {
2147       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2148     }
2149   }
2150   if (o_nnz) {
2151     for (i=0; i<B->m/bs; i++) {
2152       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2153     }
2154   }
2155 
2156   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2157   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2158   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2159   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2160 
2161   b = (Mat_MPIBAIJ*)B->data;
2162   B->bs  = bs;
2163   b->bs2 = bs*bs;
2164   b->mbs = B->m/bs;
2165   b->nbs = B->n/bs;
2166   b->Mbs = B->M/bs;
2167   b->Nbs = B->N/bs;
2168 
2169   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2170   b->rowners[0]    = 0;
2171   for (i=2; i<=b->size; i++) {
2172     b->rowners[i] += b->rowners[i-1];
2173   }
2174   b->rstart    = b->rowners[b->rank];
2175   b->rend      = b->rowners[b->rank+1];
2176 
2177   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2178   b->cowners[0] = 0;
2179   for (i=2; i<=b->size; i++) {
2180     b->cowners[i] += b->cowners[i-1];
2181   }
2182   b->cstart    = b->cowners[b->rank];
2183   b->cend      = b->cowners[b->rank+1];
2184 
2185   for (i=0; i<=b->size; i++) {
2186     b->rowners_bs[i] = b->rowners[i]*bs;
2187   }
2188   b->rstart_bs = b->rstart*bs;
2189   b->rend_bs   = b->rend*bs;
2190   b->cstart_bs = b->cstart*bs;
2191   b->cend_bs   = b->cend*bs;
2192 
2193   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2194   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2195   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2196   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2197   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2198   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2199   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2200   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2201 
2202   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2203 
2204   PetscFunctionReturn(0);
2205 }
2206 EXTERN_C_END
2207 
2208 EXTERN_C_BEGIN
2209 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2210 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2211 EXTERN_C_END
2212 
2213 /*MC
2214    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2215 
2216    Options Database Keys:
2217 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2218 
2219   Level: beginner
2220 
2221 .seealso: MatCreateMPIBAIJ
2222 M*/
2223 
2224 EXTERN_C_BEGIN
2225 #undef __FUNCT__
2226 #define __FUNCT__ "MatCreate_MPIBAIJ"
2227 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2228 {
2229   Mat_MPIBAIJ    *b;
2230   PetscErrorCode ierr;
2231   PetscTruth     flg;
2232 
2233   PetscFunctionBegin;
2234   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2235   B->data = (void*)b;
2236 
2237 
2238   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2239   B->mapping    = 0;
2240   B->factor     = 0;
2241   B->assembled  = PETSC_FALSE;
2242 
2243   B->insertmode = NOT_SET_VALUES;
2244   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2245   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2246 
2247   /* build local table of row and column ownerships */
2248   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2249   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2250   b->cowners    = b->rowners + b->size + 2;
2251   b->rowners_bs = b->cowners + b->size + 2;
2252 
2253   /* build cache for off array entries formed */
2254   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2255   b->donotstash  = PETSC_FALSE;
2256   b->colmap      = PETSC_NULL;
2257   b->garray      = PETSC_NULL;
2258   b->roworiented = PETSC_TRUE;
2259 
2260 #if defined(PETSC_USE_MAT_SINGLE)
2261   /* stuff for MatSetValues_XXX in single precision */
2262   b->setvalueslen     = 0;
2263   b->setvaluescopy    = PETSC_NULL;
2264 #endif
2265 
2266   /* stuff used in block assembly */
2267   b->barray       = 0;
2268 
2269   /* stuff used for matrix vector multiply */
2270   b->lvec         = 0;
2271   b->Mvctx        = 0;
2272 
2273   /* stuff for MatGetRow() */
2274   b->rowindices   = 0;
2275   b->rowvalues    = 0;
2276   b->getrowactive = PETSC_FALSE;
2277 
2278   /* hash table stuff */
2279   b->ht           = 0;
2280   b->hd           = 0;
2281   b->ht_size      = 0;
2282   b->ht_flag      = PETSC_FALSE;
2283   b->ht_fact      = 0;
2284   b->ht_total_ct  = 0;
2285   b->ht_insert_ct = 0;
2286 
2287   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2288   if (flg) {
2289     PetscReal fact = 1.39;
2290     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2291     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2292     if (fact <= 1.0) fact = 1.39;
2293     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2294     ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
2295   }
2296   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2297                                      "MatStoreValues_MPIBAIJ",
2298                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2299   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2300                                      "MatRetrieveValues_MPIBAIJ",
2301                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2302   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2303                                      "MatGetDiagonalBlock_MPIBAIJ",
2304                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2305   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2306                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2307                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2308   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2309 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2310 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2311   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2312                                      "MatDiagonalScaleLocal_MPIBAIJ",
2313                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2314   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2315                                      "MatSetHashTableFactor_MPIBAIJ",
2316                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 EXTERN_C_END
2320 
2321 /*MC
2322    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2323 
2324    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2325    and MATMPIBAIJ otherwise.
2326 
2327    Options Database Keys:
2328 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2329 
2330   Level: beginner
2331 
2332 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2333 M*/
2334 
2335 EXTERN_C_BEGIN
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatCreate_BAIJ"
2338 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2339 {
2340   PetscErrorCode ierr;
2341   PetscMPIInt    size;
2342 
2343   PetscFunctionBegin;
2344   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2345   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2346   if (size == 1) {
2347     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2348   } else {
2349     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2350   }
2351   PetscFunctionReturn(0);
2352 }
2353 EXTERN_C_END
2354 
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2357 /*@C
2358    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2359    (block compressed row).  For good matrix assembly performance
2360    the user should preallocate the matrix storage by setting the parameters
2361    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2362    performance can be increased by more than a factor of 50.
2363 
2364    Collective on Mat
2365 
2366    Input Parameters:
2367 +  A - the matrix
2368 .  bs   - size of blockk
2369 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2370            submatrix  (same for all local rows)
2371 .  d_nnz - array containing the number of block nonzeros in the various block rows
2372            of the in diagonal portion of the local (possibly different for each block
2373            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2374 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2375            submatrix (same for all local rows).
2376 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2377            off-diagonal portion of the local submatrix (possibly different for
2378            each block row) or PETSC_NULL.
2379 
2380    If the *_nnz parameter is given then the *_nz parameter is ignored
2381 
2382    Options Database Keys:
2383 .   -mat_no_unroll - uses code that does not unroll the loops in the
2384                      block calculations (much slower)
2385 .   -mat_block_size - size of the blocks to use
2386 
2387    Notes:
2388    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2389    than it must be used on all processors that share the object for that argument.
2390 
2391    Storage Information:
2392    For a square global matrix we define each processor's diagonal portion
2393    to be its local rows and the corresponding columns (a square submatrix);
2394    each processor's off-diagonal portion encompasses the remainder of the
2395    local matrix (a rectangular submatrix).
2396 
2397    The user can specify preallocated storage for the diagonal part of
2398    the local submatrix with either d_nz or d_nnz (not both).  Set
2399    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2400    memory allocation.  Likewise, specify preallocated storage for the
2401    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2402 
2403    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2404    the figure below we depict these three local rows and all columns (0-11).
2405 
2406 .vb
2407            0 1 2 3 4 5 6 7 8 9 10 11
2408           -------------------
2409    row 3  |  o o o d d d o o o o o o
2410    row 4  |  o o o d d d o o o o o o
2411    row 5  |  o o o d d d o o o o o o
2412           -------------------
2413 .ve
2414 
2415    Thus, any entries in the d locations are stored in the d (diagonal)
2416    submatrix, and any entries in the o locations are stored in the
2417    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2418    stored simply in the MATSEQBAIJ format for compressed row storage.
2419 
2420    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2421    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2422    In general, for PDE problems in which most nonzeros are near the diagonal,
2423    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2424    or you will get TERRIBLE performance; see the users' manual chapter on
2425    matrices.
2426 
2427    Level: intermediate
2428 
2429 .keywords: matrix, block, aij, compressed row, sparse, parallel
2430 
2431 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2432 @*/
2433 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2434 {
2435   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2436 
2437   PetscFunctionBegin;
2438   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2439   if (f) {
2440     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2441   }
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 #undef __FUNCT__
2446 #define __FUNCT__ "MatCreateMPIBAIJ"
2447 /*@C
2448    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2449    (block compressed row).  For good matrix assembly performance
2450    the user should preallocate the matrix storage by setting the parameters
2451    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2452    performance can be increased by more than a factor of 50.
2453 
2454    Collective on MPI_Comm
2455 
2456    Input Parameters:
2457 +  comm - MPI communicator
2458 .  bs   - size of blockk
2459 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2460            This value should be the same as the local size used in creating the
2461            y vector for the matrix-vector product y = Ax.
2462 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2463            This value should be the same as the local size used in creating the
2464            x vector for the matrix-vector product y = Ax.
2465 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2466 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2467 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2468            submatrix  (same for all local rows)
2469 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2470            of the in diagonal portion of the local (possibly different for each block
2471            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2472 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2473            submatrix (same for all local rows).
2474 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2475            off-diagonal portion of the local submatrix (possibly different for
2476            each block row) or PETSC_NULL.
2477 
2478    Output Parameter:
2479 .  A - the matrix
2480 
2481    Options Database Keys:
2482 .   -mat_no_unroll - uses code that does not unroll the loops in the
2483                      block calculations (much slower)
2484 .   -mat_block_size - size of the blocks to use
2485 
2486    Notes:
2487    If the *_nnz parameter is given then the *_nz parameter is ignored
2488 
2489    A nonzero block is any block that as 1 or more nonzeros in it
2490 
2491    The user MUST specify either the local or global matrix dimensions
2492    (possibly both).
2493 
2494    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2495    than it must be used on all processors that share the object for that argument.
2496 
2497    Storage Information:
2498    For a square global matrix we define each processor's diagonal portion
2499    to be its local rows and the corresponding columns (a square submatrix);
2500    each processor's off-diagonal portion encompasses the remainder of the
2501    local matrix (a rectangular submatrix).
2502 
2503    The user can specify preallocated storage for the diagonal part of
2504    the local submatrix with either d_nz or d_nnz (not both).  Set
2505    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2506    memory allocation.  Likewise, specify preallocated storage for the
2507    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2508 
2509    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2510    the figure below we depict these three local rows and all columns (0-11).
2511 
2512 .vb
2513            0 1 2 3 4 5 6 7 8 9 10 11
2514           -------------------
2515    row 3  |  o o o d d d o o o o o o
2516    row 4  |  o o o d d d o o o o o o
2517    row 5  |  o o o d d d o o o o o o
2518           -------------------
2519 .ve
2520 
2521    Thus, any entries in the d locations are stored in the d (diagonal)
2522    submatrix, and any entries in the o locations are stored in the
2523    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2524    stored simply in the MATSEQBAIJ format for compressed row storage.
2525 
2526    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2527    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2528    In general, for PDE problems in which most nonzeros are near the diagonal,
2529    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2530    or you will get TERRIBLE performance; see the users' manual chapter on
2531    matrices.
2532 
2533    Level: intermediate
2534 
2535 .keywords: matrix, block, aij, compressed row, sparse, parallel
2536 
2537 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2538 @*/
2539 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2540 {
2541   PetscErrorCode ierr;
2542   PetscMPIInt    size;
2543 
2544   PetscFunctionBegin;
2545   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2546   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2547   if (size > 1) {
2548     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2549     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2550   } else {
2551     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2552     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2553   }
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2559 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2560 {
2561   Mat            mat;
2562   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2563   PetscErrorCode ierr;
2564   PetscInt       len=0;
2565 
2566   PetscFunctionBegin;
2567   *newmat       = 0;
2568   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2569   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2570   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2571 
2572   mat->factor       = matin->factor;
2573   mat->preallocated = PETSC_TRUE;
2574   mat->assembled    = PETSC_TRUE;
2575   mat->insertmode   = NOT_SET_VALUES;
2576 
2577   a      = (Mat_MPIBAIJ*)mat->data;
2578   mat->bs  = matin->bs;
2579   a->bs2   = oldmat->bs2;
2580   a->mbs   = oldmat->mbs;
2581   a->nbs   = oldmat->nbs;
2582   a->Mbs   = oldmat->Mbs;
2583   a->Nbs   = oldmat->Nbs;
2584 
2585   a->rstart       = oldmat->rstart;
2586   a->rend         = oldmat->rend;
2587   a->cstart       = oldmat->cstart;
2588   a->cend         = oldmat->cend;
2589   a->size         = oldmat->size;
2590   a->rank         = oldmat->rank;
2591   a->donotstash   = oldmat->donotstash;
2592   a->roworiented  = oldmat->roworiented;
2593   a->rowindices   = 0;
2594   a->rowvalues    = 0;
2595   a->getrowactive = PETSC_FALSE;
2596   a->barray       = 0;
2597   a->rstart_bs    = oldmat->rstart_bs;
2598   a->rend_bs      = oldmat->rend_bs;
2599   a->cstart_bs    = oldmat->cstart_bs;
2600   a->cend_bs      = oldmat->cend_bs;
2601 
2602   /* hash table stuff */
2603   a->ht           = 0;
2604   a->hd           = 0;
2605   a->ht_size      = 0;
2606   a->ht_flag      = oldmat->ht_flag;
2607   a->ht_fact      = oldmat->ht_fact;
2608   a->ht_total_ct  = 0;
2609   a->ht_insert_ct = 0;
2610 
2611   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2612   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2613   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2614   if (oldmat->colmap) {
2615 #if defined (PETSC_USE_CTABLE)
2616   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2617 #else
2618   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2619   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2620   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2621 #endif
2622   } else a->colmap = 0;
2623 
2624   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2625     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2626     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2627     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2628   } else a->garray = 0;
2629 
2630   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2631   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2632   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2633   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2634 
2635   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2636   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2637   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2638   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2639   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2640   *newmat = mat;
2641 
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 #include "petscsys.h"
2646 
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatLoad_MPIBAIJ"
2649 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2650 {
2651   Mat            A;
2652   PetscErrorCode ierr;
2653   int            fd;
2654   PetscInt       i,nz,j,rstart,rend;
2655   PetscScalar    *vals,*buf;
2656   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2657   MPI_Status     status;
2658   PetscMPIInt    rank,size,maxnz;
2659   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2660   PetscInt       *locrowlens,*procsnz = 0,*browners;
2661   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2662   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2663   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2664   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2665 
2666   PetscFunctionBegin;
2667   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2668 
2669   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2670   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2671   if (!rank) {
2672     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2673     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2674     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2675   }
2676 
2677   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2678   M = header[1]; N = header[2];
2679 
2680   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2681 
2682   /*
2683      This code adds extra rows to make sure the number of rows is
2684      divisible by the blocksize
2685   */
2686   Mbs        = M/bs;
2687   extra_rows = bs - M + bs*Mbs;
2688   if (extra_rows == bs) extra_rows = 0;
2689   else                  Mbs++;
2690   if (extra_rows && !rank) {
2691     ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2692   }
2693 
2694   /* determine ownership of all rows */
2695   mbs        = Mbs/size + ((Mbs % size) > rank);
2696   m          = mbs*bs;
2697   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2698   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2699 
2700   /* process 0 needs enough room for process with most rows */
2701   if (!rank) {
2702     mmax = rowners[1];
2703     for (i=2; i<size; i++) {
2704       mmax = PetscMax(mmax,rowners[i]);
2705     }
2706     mmax*=bs;
2707   } else mmax = m;
2708 
2709   rowners[0] = 0;
2710   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2711   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2712   rstart = rowners[rank];
2713   rend   = rowners[rank+1];
2714 
2715   /* distribute row lengths to all processors */
2716   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2717   if (!rank) {
2718     mend = m;
2719     if (size == 1) mend = mend - extra_rows;
2720     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2721     for (j=mend; j<m; j++) locrowlens[j] = 1;
2722     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2723     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2724     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2725     for (j=0; j<m; j++) {
2726       procsnz[0] += locrowlens[j];
2727     }
2728     for (i=1; i<size; i++) {
2729       mend = browners[i+1] - browners[i];
2730       if (i == size-1) mend = mend - extra_rows;
2731       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2732       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2733       /* calculate the number of nonzeros on each processor */
2734       for (j=0; j<browners[i+1]-browners[i]; j++) {
2735         procsnz[i] += rowlengths[j];
2736       }
2737       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2738     }
2739     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2740   } else {
2741     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2742   }
2743 
2744   if (!rank) {
2745     /* determine max buffer needed and allocate it */
2746     maxnz = procsnz[0];
2747     for (i=1; i<size; i++) {
2748       maxnz = PetscMax(maxnz,procsnz[i]);
2749     }
2750     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2751 
2752     /* read in my part of the matrix column indices  */
2753     nz     = procsnz[0];
2754     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2755     mycols = ibuf;
2756     if (size == 1)  nz -= extra_rows;
2757     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2758     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2759 
2760     /* read in every ones (except the last) and ship off */
2761     for (i=1; i<size-1; i++) {
2762       nz   = procsnz[i];
2763       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2764       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2765     }
2766     /* read in the stuff for the last proc */
2767     if (size != 1) {
2768       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2769       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2770       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2771       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2772     }
2773     ierr = PetscFree(cols);CHKERRQ(ierr);
2774   } else {
2775     /* determine buffer space needed for message */
2776     nz = 0;
2777     for (i=0; i<m; i++) {
2778       nz += locrowlens[i];
2779     }
2780     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2781     mycols = ibuf;
2782     /* receive message of column indices*/
2783     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2784     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2785     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2786   }
2787 
2788   /* loop over local rows, determining number of off diagonal entries */
2789   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2790   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2791   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2792   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2793   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2794   rowcount = 0; nzcount = 0;
2795   for (i=0; i<mbs; i++) {
2796     dcount  = 0;
2797     odcount = 0;
2798     for (j=0; j<bs; j++) {
2799       kmax = locrowlens[rowcount];
2800       for (k=0; k<kmax; k++) {
2801         tmp = mycols[nzcount++]/bs;
2802         if (!mask[tmp]) {
2803           mask[tmp] = 1;
2804           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2805           else masked1[dcount++] = tmp;
2806         }
2807       }
2808       rowcount++;
2809     }
2810 
2811     dlens[i]  = dcount;
2812     odlens[i] = odcount;
2813 
2814     /* zero out the mask elements we set */
2815     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2816     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2817   }
2818 
2819   /* create our matrix */
2820   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2821   ierr = MatSetType(A,type);CHKERRQ(ierr)
2822   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2823 
2824   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2825   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2826 
2827   if (!rank) {
2828     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2829     /* read in my part of the matrix numerical values  */
2830     nz = procsnz[0];
2831     vals = buf;
2832     mycols = ibuf;
2833     if (size == 1)  nz -= extra_rows;
2834     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2835     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2836 
2837     /* insert into matrix */
2838     jj      = rstart*bs;
2839     for (i=0; i<m; i++) {
2840       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2841       mycols += locrowlens[i];
2842       vals   += locrowlens[i];
2843       jj++;
2844     }
2845     /* read in other processors (except the last one) and ship out */
2846     for (i=1; i<size-1; i++) {
2847       nz   = procsnz[i];
2848       vals = buf;
2849       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2850       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2851     }
2852     /* the last proc */
2853     if (size != 1){
2854       nz   = procsnz[i] - extra_rows;
2855       vals = buf;
2856       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2857       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2858       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2859     }
2860     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2861   } else {
2862     /* receive numeric values */
2863     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2864 
2865     /* receive message of values*/
2866     vals   = buf;
2867     mycols = ibuf;
2868     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2869     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2870     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2871 
2872     /* insert into matrix */
2873     jj      = rstart*bs;
2874     for (i=0; i<m; i++) {
2875       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2876       mycols += locrowlens[i];
2877       vals   += locrowlens[i];
2878       jj++;
2879     }
2880   }
2881   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2882   ierr = PetscFree(buf);CHKERRQ(ierr);
2883   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2884   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2885   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2886   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2887   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2888   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2889 
2890   *newmat = A;
2891   PetscFunctionReturn(0);
2892 }
2893 
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2896 /*@
2897    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2898 
2899    Input Parameters:
2900 .  mat  - the matrix
2901 .  fact - factor
2902 
2903    Collective on Mat
2904 
2905    Level: advanced
2906 
2907   Notes:
2908    This can also be set by the command line option: -mat_use_hash_table fact
2909 
2910 .keywords: matrix, hashtable, factor, HT
2911 
2912 .seealso: MatSetOption()
2913 @*/
2914 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2915 {
2916   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2917 
2918   PetscFunctionBegin;
2919   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2920   if (f) {
2921     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2922   }
2923   PetscFunctionReturn(0);
2924 }
2925 
2926 EXTERN_C_BEGIN
2927 #undef __FUNCT__
2928 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2929 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2930 {
2931   Mat_MPIBAIJ *baij;
2932 
2933   PetscFunctionBegin;
2934   baij = (Mat_MPIBAIJ*)mat->data;
2935   baij->ht_fact = fact;
2936   PetscFunctionReturn(0);
2937 }
2938 EXTERN_C_END
2939 
2940 #undef __FUNCT__
2941 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2942 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2943 {
2944   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2945   PetscFunctionBegin;
2946   *Ad     = a->A;
2947   *Ao     = a->B;
2948   *colmap = a->garray;
2949   PetscFunctionReturn(0);
2950 }
2951