xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision df48e8d96c19df54efdde2c76f874a16fcafd8fe)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
16 
17 /*  UGLY, ugly, ugly
18    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
19    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
20    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
21    converts the entries into single precision and then calls ..._MatScalar() to put them
22    into the single precision data structures.
23 */
24 #if defined(PETSC_USE_MAT_SINGLE)
25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
30 #else
31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
32 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
34 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
36 #endif
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
41 {
42   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45   PetscScalar    *va,*vb;
46   Vec            vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
110   PetscErrorCode ierr;
111   PetscInt       nbs = B->nbs,i,bs=mat->bs;
112 
113   PetscFunctionBegin;
114 #if defined (PETSC_USE_CTABLE)
115   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
116   for (i=0; i<nbs; i++){
117     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
118   }
119 #else
120   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
121   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
123   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
124 #endif
125   PetscFunctionReturn(0);
126 }
127 
128 #define CHUNKSIZE  10
129 
130 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
131 { \
132  \
133     brow = row/bs;  \
134     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135     rmax = aimax[brow]; nrow = ailen[brow]; \
136       bcol = col/bs; \
137       ridx = row % bs; cidx = col % bs; \
138       low = 0; high = nrow; \
139       while (high-low > 3) { \
140         t = (low+high)/2; \
141         if (rp[t] > bcol) high = t; \
142         else              low  = t; \
143       } \
144       for (_i=low; _i<high; _i++) { \
145         if (rp[_i] > bcol) break; \
146         if (rp[_i] == bcol) { \
147           bap  = ap +  bs2*_i + bs*cidx + ridx; \
148           if (addv == ADD_VALUES) *bap += value;  \
149           else                    *bap  = value;  \
150           goto a_noinsert; \
151         } \
152       } \
153       if (a->nonew == 1) goto a_noinsert; \
154       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
155       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 
169 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
193       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
194       CHKMEMQ;\
195       N = nrow++ - 1;  \
196       /* shift up all the later entries in this row */ \
197       for (ii=N; ii>=_i; ii--) { \
198         rp[ii+1] = rp[ii]; \
199         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
200       } \
201       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
202       rp[_i]                      = bcol;  \
203       ap[bs2*_i + bs*cidx + ridx] = value;  \
204       b_noinsert:; \
205     bilen[brow] = nrow; \
206 }
207 
208 #if defined(PETSC_USE_MAT_SINGLE)
209 #undef __FUNCT__
210 #define __FUNCT__ "MatSetValues_MPIBAIJ"
211 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
212 {
213   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
214   PetscErrorCode ierr;
215   PetscInt       i,N = m*n;
216   MatScalar      *vsingle;
217 
218   PetscFunctionBegin;
219   if (N > b->setvalueslen) {
220     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
221     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
222     b->setvalueslen  = N;
223   }
224   vsingle = b->setvaluescopy;
225 
226   for (i=0; i<N; i++) {
227     vsingle[i] = v[i];
228   }
229   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
235 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
238   PetscErrorCode ierr;
239   PetscInt       i,N = m*n*b->bs2;
240   MatScalar      *vsingle;
241 
242   PetscFunctionBegin;
243   if (N > b->setvalueslen) {
244     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
245     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
246     b->setvalueslen  = N;
247   }
248   vsingle = b->setvaluescopy;
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
258 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
281 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   PetscInt       i,N = m*n*b->bs2;
286   MatScalar      *vsingle;
287 
288   PetscFunctionBegin;
289   if (N > b->setvalueslen) {
290     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
291     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
292     b->setvalueslen  = N;
293   }
294   vsingle = b->setvaluescopy;
295   for (i=0; i<N; i++) {
296     vsingle[i] = v[i];
297   }
298   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 #endif
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
305 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
306 {
307   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
308   MatScalar      value;
309   PetscTruth     roworiented = baij->roworiented;
310   PetscErrorCode ierr;
311   PetscInt       i,j,row,col;
312   PetscInt       rstart_orig=baij->rstart_bs;
313   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
314   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
315 
316   /* Some Variables required in the macro */
317   Mat            A = baij->A;
318   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
319   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
320   MatScalar      *aa=a->a;
321 
322   Mat            B = baij->B;
323   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
324   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
325   MatScalar      *ba=b->a;
326 
327   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
328   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
329   MatScalar      *ap,*bap;
330 
331   PetscFunctionBegin;
332   for (i=0; i<m; i++) {
333     if (im[i] < 0) continue;
334 #if defined(PETSC_USE_DEBUG)
335     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
336 #endif
337     if (im[i] >= rstart_orig && im[i] < rend_orig) {
338       row = im[i] - rstart_orig;
339       for (j=0; j<n; j++) {
340         if (in[j] >= cstart_orig && in[j] < cend_orig){
341           col = in[j] - cstart_orig;
342           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
343           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
344           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
345         } else if (in[j] < 0) continue;
346 #if defined(PETSC_USE_DEBUG)
347         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
348 #endif
349         else {
350           if (mat->was_assembled) {
351             if (!baij->colmap) {
352               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
353             }
354 #if defined (PETSC_USE_CTABLE)
355             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
356             col  = col - 1;
357 #else
358             col = baij->colmap[in[j]/bs] - 1;
359 #endif
360             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
361               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
362               col =  in[j];
363               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
364               B = baij->B;
365               b = (Mat_SeqBAIJ*)(B)->data;
366               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
367               ba=b->a;
368             } else col += in[j]%bs;
369           } else col = in[j];
370           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
371           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
372           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
373         }
374       }
375     } else {
376       if (!baij->donotstash) {
377         if (roworiented) {
378           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
379         } else {
380           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
381         }
382       }
383     }
384   }
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
391 {
392   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
393   const MatScalar *value;
394   MatScalar       *barray=baij->barray;
395   PetscTruth      roworiented = baij->roworiented;
396   PetscErrorCode  ierr;
397   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
398   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
399   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
400 
401   PetscFunctionBegin;
402   if(!barray) {
403     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
404     baij->barray = barray;
405   }
406 
407   if (roworiented) {
408     stepval = (n-1)*bs;
409   } else {
410     stepval = (m-1)*bs;
411   }
412   for (i=0; i<m; i++) {
413     if (im[i] < 0) continue;
414 #if defined(PETSC_USE_DEBUG)
415     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
416 #endif
417     if (im[i] >= rstart && im[i] < rend) {
418       row = im[i] - rstart;
419       for (j=0; j<n; j++) {
420         /* If NumCol = 1 then a copy is not required */
421         if ((roworiented) && (n == 1)) {
422           barray = (MatScalar*)v + i*bs2;
423         } else if((!roworiented) && (m == 1)) {
424           barray = (MatScalar*)v + j*bs2;
425         } else { /* Here a copy is required */
426           if (roworiented) {
427             value = v + i*(stepval+bs)*bs + j*bs;
428           } else {
429             value = v + j*(stepval+bs)*bs + i*bs;
430           }
431           for (ii=0; ii<bs; ii++,value+=stepval) {
432             for (jj=0; jj<bs; jj++) {
433               *barray++  = *value++;
434             }
435           }
436           barray -=bs2;
437         }
438 
439         if (in[j] >= cstart && in[j] < cend){
440           col  = in[j] - cstart;
441           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
442         }
443         else if (in[j] < 0) continue;
444 #if defined(PETSC_USE_DEBUG)
445         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
446 #endif
447         else {
448           if (mat->was_assembled) {
449             if (!baij->colmap) {
450               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
451             }
452 
453 #if defined(PETSC_USE_DEBUG)
454 #if defined (PETSC_USE_CTABLE)
455             { PetscInt data;
456               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
457               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
458             }
459 #else
460             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461 #endif
462 #endif
463 #if defined (PETSC_USE_CTABLE)
464 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
465             col  = (col - 1)/bs;
466 #else
467             col = (baij->colmap[in[j]] - 1)/bs;
468 #endif
469             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
470               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
471               col =  in[j];
472             }
473           }
474           else col = in[j];
475           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
476         }
477       }
478     } else {
479       if (!baij->donotstash) {
480         if (roworiented) {
481           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
482         } else {
483           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
484         }
485       }
486     }
487   }
488   PetscFunctionReturn(0);
489 }
490 
491 #define HASH_KEY 0.6180339887
492 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
493 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
494 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
495 #undef __FUNCT__
496 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
497 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
498 {
499   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
500   PetscTruth     roworiented = baij->roworiented;
501   PetscErrorCode ierr;
502   PetscInt       i,j,row,col;
503   PetscInt       rstart_orig=baij->rstart_bs;
504   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
505   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
506   PetscReal      tmp;
507   MatScalar      **HD = baij->hd,value;
508 #if defined(PETSC_USE_DEBUG)
509   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
510 #endif
511 
512   PetscFunctionBegin;
513 
514   for (i=0; i<m; i++) {
515 #if defined(PETSC_USE_DEBUG)
516     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
517     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
518 #endif
519       row = im[i];
520     if (row >= rstart_orig && row < rend_orig) {
521       for (j=0; j<n; j++) {
522         col = in[j];
523         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
524         /* Look up PetscInto the Hash Table */
525         key = (row/bs)*Nbs+(col/bs)+1;
526         h1  = HASH(size,key,tmp);
527 
528 
529         idx = h1;
530 #if defined(PETSC_USE_DEBUG)
531         insert_ct++;
532         total_ct++;
533         if (HT[idx] != key) {
534           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
535           if (idx == size) {
536             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
537             if (idx == h1) {
538               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
539             }
540           }
541         }
542 #else
543         if (HT[idx] != key) {
544           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
545           if (idx == size) {
546             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
547             if (idx == h1) {
548               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
549             }
550           }
551         }
552 #endif
553         /* A HASH table entry is found, so insert the values at the correct address */
554         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
555         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
556       }
557     } else {
558       if (!baij->donotstash) {
559         if (roworiented) {
560           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
561         } else {
562           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
563         }
564       }
565     }
566   }
567 #if defined(PETSC_USE_DEBUG)
568   baij->ht_total_ct = total_ct;
569   baij->ht_insert_ct = insert_ct;
570 #endif
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
576 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
577 {
578   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
579   PetscTruth      roworiented = baij->roworiented;
580   PetscErrorCode  ierr;
581   PetscInt        i,j,ii,jj,row,col;
582   PetscInt        rstart=baij->rstart ;
583   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
584   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
585   PetscReal       tmp;
586   MatScalar       **HD = baij->hd,*baij_a;
587   const MatScalar *v_t,*value;
588 #if defined(PETSC_USE_DEBUG)
589   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
590 #endif
591 
592   PetscFunctionBegin;
593 
594   if (roworiented) {
595     stepval = (n-1)*bs;
596   } else {
597     stepval = (m-1)*bs;
598   }
599   for (i=0; i<m; i++) {
600 #if defined(PETSC_USE_DEBUG)
601     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
602     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
603 #endif
604     row   = im[i];
605     v_t   = v + i*bs2;
606     if (row >= rstart && row < rend) {
607       for (j=0; j<n; j++) {
608         col = in[j];
609 
610         /* Look up into the Hash Table */
611         key = row*Nbs+col+1;
612         h1  = HASH(size,key,tmp);
613 
614         idx = h1;
615 #if defined(PETSC_USE_DEBUG)
616         total_ct++;
617         insert_ct++;
618        if (HT[idx] != key) {
619           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
620           if (idx == size) {
621             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
622             if (idx == h1) {
623               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
624             }
625           }
626         }
627 #else
628         if (HT[idx] != key) {
629           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
630           if (idx == size) {
631             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
632             if (idx == h1) {
633               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
634             }
635           }
636         }
637 #endif
638         baij_a = HD[idx];
639         if (roworiented) {
640           /*value = v + i*(stepval+bs)*bs + j*bs;*/
641           /* value = v + (i*(stepval+bs)+j)*bs; */
642           value = v_t;
643           v_t  += bs;
644           if (addv == ADD_VALUES) {
645             for (ii=0; ii<bs; ii++,value+=stepval) {
646               for (jj=ii; jj<bs2; jj+=bs) {
647                 baij_a[jj]  += *value++;
648               }
649             }
650           } else {
651             for (ii=0; ii<bs; ii++,value+=stepval) {
652               for (jj=ii; jj<bs2; jj+=bs) {
653                 baij_a[jj]  = *value++;
654               }
655             }
656           }
657         } else {
658           value = v + j*(stepval+bs)*bs + i*bs;
659           if (addv == ADD_VALUES) {
660             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
661               for (jj=0; jj<bs; jj++) {
662                 baij_a[jj]  += *value++;
663               }
664             }
665           } else {
666             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
667               for (jj=0; jj<bs; jj++) {
668                 baij_a[jj]  = *value++;
669               }
670             }
671           }
672         }
673       }
674     } else {
675       if (!baij->donotstash) {
676         if (roworiented) {
677           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
678         } else {
679           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
680         }
681       }
682     }
683   }
684 #if defined(PETSC_USE_DEBUG)
685   baij->ht_total_ct = total_ct;
686   baij->ht_insert_ct = insert_ct;
687 #endif
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatGetValues_MPIBAIJ"
693 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
694 {
695   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
696   PetscErrorCode ierr;
697   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
698   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
699 
700   PetscFunctionBegin;
701   for (i=0; i<m; i++) {
702     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
703     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
704     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
705       row = idxm[i] - bsrstart;
706       for (j=0; j<n; j++) {
707         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
708         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
709         if (idxn[j] >= bscstart && idxn[j] < bscend){
710           col = idxn[j] - bscstart;
711           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
712         } else {
713           if (!baij->colmap) {
714             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
715           }
716 #if defined (PETSC_USE_CTABLE)
717           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
718           data --;
719 #else
720           data = baij->colmap[idxn[j]/bs]-1;
721 #endif
722           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
723           else {
724             col  = data + idxn[j]%bs;
725             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
726           }
727         }
728       }
729     } else {
730       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
731     }
732   }
733  PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatNorm_MPIBAIJ"
738 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
739 {
740   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
741   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
742   PetscErrorCode ierr;
743   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col;
744   PetscReal      sum = 0.0;
745   MatScalar      *v;
746 
747   PetscFunctionBegin;
748   if (baij->size == 1) {
749     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
750   } else {
751     if (type == NORM_FROBENIUS) {
752       v = amat->a;
753       nz = amat->nz*bs2;
754       for (i=0; i<nz; i++) {
755 #if defined(PETSC_USE_COMPLEX)
756         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
757 #else
758         sum += (*v)*(*v); v++;
759 #endif
760       }
761       v = bmat->a;
762       nz = bmat->nz*bs2;
763       for (i=0; i<nz; i++) {
764 #if defined(PETSC_USE_COMPLEX)
765         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
766 #else
767         sum += (*v)*(*v); v++;
768 #endif
769       }
770       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
771       *nrm = sqrt(*nrm);
772     } else if (type == NORM_1) { /* max column sum */
773       PetscReal *tmp,*tmp2;
774       PetscInt  *jj,*garray=baij->garray,cstart=baij->cstart;
775       ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
776       tmp2 = tmp + mat->N;
777       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
778       v = amat->a; jj = amat->j;
779       for (i=0; i<amat->nz; i++) {
780         for (j=0; j<bs; j++){
781           col = bs*(cstart + *jj) + j; /* column index */
782           for (row=0; row<bs; row++){
783             tmp[col] += PetscAbsScalar(*v);  v++;
784           }
785         }
786         jj++;
787       }
788       v = bmat->a; jj = bmat->j;
789       for (i=0; i<bmat->nz; i++) {
790         for (j=0; j<bs; j++){
791           col = bs*garray[*jj] + j;
792           for (row=0; row<bs; row++){
793             tmp[col] += PetscAbsScalar(*v); v++;
794           }
795         }
796         jj++;
797       }
798       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
799       *nrm = 0.0;
800       for (j=0; j<mat->N; j++) {
801         if (tmp2[j] > *nrm) *nrm = tmp2[j];
802       }
803       ierr = PetscFree(tmp);CHKERRQ(ierr);
804     } else if (type == NORM_INFINITY) { /* max row sum */
805       PetscReal *sums;
806       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
807       sum = 0.0;
808       for (j=0; j<amat->mbs; j++) {
809         for (row=0; row<bs; row++) sums[row] = 0.0;
810         v = amat->a + bs2*amat->i[j];
811         nz = amat->i[j+1]-amat->i[j];
812         for (i=0; i<nz; i++) {
813           for (col=0; col<bs; col++){
814             for (row=0; row<bs; row++){
815               sums[row] += PetscAbsScalar(*v); v++;
816             }
817           }
818         }
819         v = bmat->a + bs2*bmat->i[j];
820         nz = bmat->i[j+1]-bmat->i[j];
821         for (i=0; i<nz; i++) {
822           for (col=0; col<bs; col++){
823             for (row=0; row<bs; row++){
824               sums[row] += PetscAbsScalar(*v); v++;
825             }
826           }
827         }
828         for (row=0; row<bs; row++){
829           if (sums[row] > sum) sum = sums[row];
830         }
831       }
832       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
833       ierr = PetscFree(sums);CHKERRQ(ierr);
834     } else {
835       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
836     }
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat            A = baij->A,B=baij->B;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   PetscErrorCode ierr;
857   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
858   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
859   PetscInt       *HT,key;
860   MatScalar      **HD;
861   PetscReal      tmp;
862 #if defined(PETSC_USE_DEBUG)
863   PetscInt       ct=0,max=0;
864 #endif
865 
866   PetscFunctionBegin;
867   baij->ht_size=(PetscInt)(factor*nz);
868   size = baij->ht_size;
869 
870   if (baij->ht) {
871     PetscFunctionReturn(0);
872   }
873 
874   /* Allocate Memory for Hash Table */
875   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
876   baij->ht = (PetscInt*)(baij->hd + size);
877   HD       = baij->hd;
878   HT       = baij->ht;
879 
880 
881   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
882 
883 
884   /* Loop Over A */
885   for (i=0; i<a->mbs; i++) {
886     for (j=ai[i]; j<ai[i+1]; j++) {
887       row = i+rstart;
888       col = aj[j]+cstart;
889 
890       key = row*Nbs + col + 1;
891       h1  = HASH(size,key,tmp);
892       for (k=0; k<size; k++){
893         if (!HT[(h1+k)%size]) {
894           HT[(h1+k)%size] = key;
895           HD[(h1+k)%size] = a->a + j*bs2;
896           break;
897 #if defined(PETSC_USE_DEBUG)
898         } else {
899           ct++;
900 #endif
901         }
902       }
903 #if defined(PETSC_USE_DEBUG)
904       if (k> max) max = k;
905 #endif
906     }
907   }
908   /* Loop Over B */
909   for (i=0; i<b->mbs; i++) {
910     for (j=bi[i]; j<bi[i+1]; j++) {
911       row = i+rstart;
912       col = garray[bj[j]];
913       key = row*Nbs + col + 1;
914       h1  = HASH(size,key,tmp);
915       for (k=0; k<size; k++){
916         if (!HT[(h1+k)%size]) {
917           HT[(h1+k)%size] = key;
918           HD[(h1+k)%size] = b->a + j*bs2;
919           break;
920 #if defined(PETSC_USE_DEBUG)
921         } else {
922           ct++;
923 #endif
924         }
925       }
926 #if defined(PETSC_USE_DEBUG)
927       if (k> max) max = k;
928 #endif
929     }
930   }
931 
932   /* Print Summary */
933 #if defined(PETSC_USE_DEBUG)
934   for (i=0,j=0; i<size; i++) {
935     if (HT[i]) {j++;}
936   }
937   ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr);
938 #endif
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
944 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
945 {
946   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
947   PetscErrorCode ierr;
948   PetscInt       nstash,reallocs;
949   InsertMode     addv;
950 
951   PetscFunctionBegin;
952   if (baij->donotstash) {
953     PetscFunctionReturn(0);
954   }
955 
956   /* make sure all processors are either in INSERTMODE or ADDMODE */
957   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
958   if (addv == (ADD_VALUES|INSERT_VALUES)) {
959     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
960   }
961   mat->insertmode = addv; /* in case this processor had no cache */
962 
963   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
964   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
965   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
966   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
967   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
968   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
974 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
975 {
976   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
977   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
978   PetscErrorCode ierr;
979   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
980   PetscInt       *row,*col,other_disassembled;
981   PetscTruth     r1,r2,r3;
982   MatScalar      *val;
983   InsertMode     addv = mat->insertmode;
984   PetscMPIInt    n;
985 
986   PetscFunctionBegin;
987   if (!baij->donotstash) {
988     while (1) {
989       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
990       if (!flg) break;
991 
992       for (i=0; i<n;) {
993         /* Now identify the consecutive vals belonging to the same row */
994         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995         if (j < n) ncols = j-i;
996         else       ncols = n-i;
997         /* Now assemble all these values with a single function call */
998         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999         i = j;
1000       }
1001     }
1002     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003     /* Now process the block-stash. Since the values are stashed column-oriented,
1004        set the roworiented flag to column oriented, and after MatSetValues()
1005        restore the original flags */
1006     r1 = baij->roworiented;
1007     r2 = a->roworiented;
1008     r3 = b->roworiented;
1009     baij->roworiented = PETSC_FALSE;
1010     a->roworiented    = PETSC_FALSE;
1011     b->roworiented    = PETSC_FALSE;
1012     while (1) {
1013       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014       if (!flg) break;
1015 
1016       for (i=0; i<n;) {
1017         /* Now identify the consecutive vals belonging to the same row */
1018         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019         if (j < n) ncols = j-i;
1020         else       ncols = n-i;
1021         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022         i = j;
1023       }
1024     }
1025     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026     baij->roworiented = r1;
1027     a->roworiented    = r2;
1028     b->roworiented    = r3;
1029   }
1030 
1031   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033 
1034   /* determine if any processor has disassembled, if so we must
1035      also disassemble ourselfs, in order that we may reassemble. */
1036   /*
1037      if nonzero structure of submatrix B cannot change then we know that
1038      no processor disassembled thus we can skip this stuff
1039   */
1040   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042     if (mat->was_assembled && !other_disassembled) {
1043       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1044     }
1045   }
1046 
1047   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1049   }
1050   b->compressedrow.use = PETSC_TRUE;
1051   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1052   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1053 
1054 #if defined(PETSC_USE_DEBUG)
1055   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1056     ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr);
1057     baij->ht_total_ct  = 0;
1058     baij->ht_insert_ct = 0;
1059   }
1060 #endif
1061   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1062     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1063     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1064     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1065   }
1066 
1067   if (baij->rowvalues) {
1068     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1069     baij->rowvalues = 0;
1070   }
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1076 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1077 {
1078   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1079   PetscErrorCode    ierr;
1080   PetscMPIInt       size = baij->size,rank = baij->rank;
1081   PetscInt          bs = mat->bs;
1082   PetscTruth        iascii,isdraw;
1083   PetscViewer       sviewer;
1084   PetscViewerFormat format;
1085 
1086   PetscFunctionBegin;
1087   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1088   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1089   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1090   if (iascii) {
1091     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1092     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1093       MatInfo info;
1094       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1095       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1096       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1097               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1098               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1099       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1100       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1101       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1102       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1103       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1104       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1105       PetscFunctionReturn(0);
1106     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1107       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1108       PetscFunctionReturn(0);
1109     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1110       PetscFunctionReturn(0);
1111     }
1112   }
1113 
1114   if (isdraw) {
1115     PetscDraw       draw;
1116     PetscTruth isnull;
1117     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1118     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1119   }
1120 
1121   if (size == 1) {
1122     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1123     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1124   } else {
1125     /* assemble the entire matrix onto first processor. */
1126     Mat         A;
1127     Mat_SeqBAIJ *Aloc;
1128     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1129     MatScalar   *a;
1130 
1131     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1132     /* Perhaps this should be the type of mat? */
1133     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
1134     if (!rank) {
1135       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1136     } else {
1137       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1138     }
1139     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1140     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1141     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1142 
1143     /* copy over the A part */
1144     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1145     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1146     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1147 
1148     for (i=0; i<mbs; i++) {
1149       rvals[0] = bs*(baij->rstart + i);
1150       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1151       for (j=ai[i]; j<ai[i+1]; j++) {
1152         col = (baij->cstart+aj[j])*bs;
1153         for (k=0; k<bs; k++) {
1154           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1155           col++; a += bs;
1156         }
1157       }
1158     }
1159     /* copy over the B part */
1160     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1161     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1162     for (i=0; i<mbs; i++) {
1163       rvals[0] = bs*(baij->rstart + i);
1164       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1165       for (j=ai[i]; j<ai[i+1]; j++) {
1166         col = baij->garray[aj[j]]*bs;
1167         for (k=0; k<bs; k++) {
1168           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1169           col++; a += bs;
1170         }
1171       }
1172     }
1173     ierr = PetscFree(rvals);CHKERRQ(ierr);
1174     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1175     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1176     /*
1177        Everyone has to call to draw the matrix since the graphics waits are
1178        synchronized across all processors that share the PetscDraw object
1179     */
1180     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1181     if (!rank) {
1182       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1183       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1184     }
1185     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1186     ierr = MatDestroy(A);CHKERRQ(ierr);
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatView_MPIBAIJ"
1193 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1194 {
1195   PetscErrorCode ierr;
1196   PetscTruth     iascii,isdraw,issocket,isbinary;
1197 
1198   PetscFunctionBegin;
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1203   if (iascii || isdraw || issocket || isbinary) {
1204     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1205   } else {
1206     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1207   }
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1213 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1214 {
1215   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1216   PetscErrorCode ierr;
1217 
1218   PetscFunctionBegin;
1219 #if defined(PETSC_USE_LOG)
1220   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1221 #endif
1222   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1223   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1224   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1225   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1226   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1227 #if defined (PETSC_USE_CTABLE)
1228   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1229 #else
1230   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1231 #endif
1232   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1233   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1234   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1235   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1236   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1237   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1238 #if defined(PETSC_USE_MAT_SINGLE)
1239   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1240 #endif
1241   ierr = PetscFree(baij);CHKERRQ(ierr);
1242 
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatMult_MPIBAIJ"
1255 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1256 {
1257   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1258   PetscErrorCode ierr;
1259   PetscInt       nt;
1260 
1261   PetscFunctionBegin;
1262   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1263   if (nt != A->n) {
1264     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1265   }
1266   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1267   if (nt != A->m) {
1268     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1269   }
1270   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1271   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1272   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1273   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1274   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1280 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1281 {
1282   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1283   PetscErrorCode ierr;
1284 
1285   PetscFunctionBegin;
1286   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1287   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1288   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1289   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1295 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1296 {
1297   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1298   PetscErrorCode ierr;
1299   PetscTruth     merged;
1300 
1301   PetscFunctionBegin;
1302   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1303   /* do nondiagonal part */
1304   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1305   if (!merged) {
1306     /* send it on its way */
1307     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1308     /* do local part */
1309     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1310     /* receive remote parts: note this assumes the values are not actually */
1311     /* inserted in yy until the next line */
1312     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1313   } else {
1314     /* do local part */
1315     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1316     /* send it on its way */
1317     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1318     /* values actually were received in the Begin() but we need to call this nop */
1319     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1320   }
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1326 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   /* do nondiagonal part */
1333   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1334   /* send it on its way */
1335   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1336   /* do local part */
1337   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1338   /* receive remote parts: note this assumes the values are not actually */
1339   /* inserted in yy until the next line, which is true for my implementation*/
1340   /* but is not perhaps always true. */
1341   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 /*
1346   This only works correctly for square matrices where the subblock A->A is the
1347    diagonal block
1348 */
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1351 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1352 {
1353   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1358   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatScale_MPIBAIJ"
1364 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1365 {
1366   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1367   PetscErrorCode ierr;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1371   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1377 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1378 {
1379   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1380   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1381   PetscErrorCode ierr;
1382   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1383   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1384   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1385 
1386   PetscFunctionBegin;
1387   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1388   mat->getrowactive = PETSC_TRUE;
1389 
1390   if (!mat->rowvalues && (idx || v)) {
1391     /*
1392         allocate enough space to hold information from the longest row.
1393     */
1394     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1395     PetscInt     max = 1,mbs = mat->mbs,tmp;
1396     for (i=0; i<mbs; i++) {
1397       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1398       if (max < tmp) { max = tmp; }
1399     }
1400     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1401     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1402   }
1403 
1404   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1405   lrow = row - brstart;
1406 
1407   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1408   if (!v)   {pvA = 0; pvB = 0;}
1409   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1410   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1411   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1412   nztot = nzA + nzB;
1413 
1414   cmap  = mat->garray;
1415   if (v  || idx) {
1416     if (nztot) {
1417       /* Sort by increasing column numbers, assuming A and B already sorted */
1418       PetscInt imark = -1;
1419       if (v) {
1420         *v = v_p = mat->rowvalues;
1421         for (i=0; i<nzB; i++) {
1422           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1423           else break;
1424         }
1425         imark = i;
1426         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1427         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1428       }
1429       if (idx) {
1430         *idx = idx_p = mat->rowindices;
1431         if (imark > -1) {
1432           for (i=0; i<imark; i++) {
1433             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1434           }
1435         } else {
1436           for (i=0; i<nzB; i++) {
1437             if (cmap[cworkB[i]/bs] < cstart)
1438               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1439             else break;
1440           }
1441           imark = i;
1442         }
1443         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1444         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1445       }
1446     } else {
1447       if (idx) *idx = 0;
1448       if (v)   *v   = 0;
1449     }
1450   }
1451   *nz = nztot;
1452   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1453   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1459 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1460 {
1461   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1462 
1463   PetscFunctionBegin;
1464   if (!baij->getrowactive) {
1465     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1466   }
1467   baij->getrowactive = PETSC_FALSE;
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1473 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1474 {
1475   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477 
1478   PetscFunctionBegin;
1479   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1480   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 #undef __FUNCT__
1485 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1486 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1487 {
1488   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1489   Mat            A = a->A,B = a->B;
1490   PetscErrorCode ierr;
1491   PetscReal      isend[5],irecv[5];
1492 
1493   PetscFunctionBegin;
1494   info->block_size     = (PetscReal)matin->bs;
1495   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1496   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1497   isend[3] = info->memory;  isend[4] = info->mallocs;
1498   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1499   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1500   isend[3] += info->memory;  isend[4] += info->mallocs;
1501   if (flag == MAT_LOCAL) {
1502     info->nz_used      = isend[0];
1503     info->nz_allocated = isend[1];
1504     info->nz_unneeded  = isend[2];
1505     info->memory       = isend[3];
1506     info->mallocs      = isend[4];
1507   } else if (flag == MAT_GLOBAL_MAX) {
1508     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1509     info->nz_used      = irecv[0];
1510     info->nz_allocated = irecv[1];
1511     info->nz_unneeded  = irecv[2];
1512     info->memory       = irecv[3];
1513     info->mallocs      = irecv[4];
1514   } else if (flag == MAT_GLOBAL_SUM) {
1515     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1516     info->nz_used      = irecv[0];
1517     info->nz_allocated = irecv[1];
1518     info->nz_unneeded  = irecv[2];
1519     info->memory       = irecv[3];
1520     info->mallocs      = irecv[4];
1521   } else {
1522     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1523   }
1524   info->rows_global       = (PetscReal)A->M;
1525   info->columns_global    = (PetscReal)A->N;
1526   info->rows_local        = (PetscReal)A->m;
1527   info->columns_local     = (PetscReal)A->N;
1528   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1529   info->fill_ratio_needed = 0;
1530   info->factor_mallocs    = 0;
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1536 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1537 {
1538   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1539   PetscErrorCode ierr;
1540 
1541   PetscFunctionBegin;
1542   switch (op) {
1543   case MAT_NO_NEW_NONZERO_LOCATIONS:
1544   case MAT_YES_NEW_NONZERO_LOCATIONS:
1545   case MAT_COLUMNS_UNSORTED:
1546   case MAT_COLUMNS_SORTED:
1547   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1548   case MAT_KEEP_ZEROED_ROWS:
1549   case MAT_NEW_NONZERO_LOCATION_ERR:
1550     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1551     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1552     break;
1553   case MAT_ROW_ORIENTED:
1554     a->roworiented = PETSC_TRUE;
1555     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1556     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1557     break;
1558   case MAT_ROWS_SORTED:
1559   case MAT_ROWS_UNSORTED:
1560   case MAT_YES_NEW_DIAGONALS:
1561     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1562     break;
1563   case MAT_COLUMN_ORIENTED:
1564     a->roworiented = PETSC_FALSE;
1565     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1566     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1567     break;
1568   case MAT_IGNORE_OFF_PROC_ENTRIES:
1569     a->donotstash = PETSC_TRUE;
1570     break;
1571   case MAT_NO_NEW_DIAGONALS:
1572     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1573   case MAT_USE_HASH_TABLE:
1574     a->ht_flag = PETSC_TRUE;
1575     break;
1576   case MAT_SYMMETRIC:
1577   case MAT_STRUCTURALLY_SYMMETRIC:
1578   case MAT_HERMITIAN:
1579   case MAT_SYMMETRY_ETERNAL:
1580     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1581     break;
1582   case MAT_NOT_SYMMETRIC:
1583   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1584   case MAT_NOT_HERMITIAN:
1585   case MAT_NOT_SYMMETRY_ETERNAL:
1586     break;
1587   default:
1588     SETERRQ(PETSC_ERR_SUP,"unknown option");
1589   }
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1595 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1596 {
1597   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1598   Mat_SeqBAIJ    *Aloc;
1599   Mat            B;
1600   PetscErrorCode ierr;
1601   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1602   PetscInt       bs=A->bs,mbs=baij->mbs;
1603   MatScalar      *a;
1604 
1605   PetscFunctionBegin;
1606   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1607   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1608   ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr);
1609   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1610   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1611 
1612   /* copy over the A part */
1613   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1614   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1615   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1616 
1617   for (i=0; i<mbs; i++) {
1618     rvals[0] = bs*(baij->rstart + i);
1619     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1620     for (j=ai[i]; j<ai[i+1]; j++) {
1621       col = (baij->cstart+aj[j])*bs;
1622       for (k=0; k<bs; k++) {
1623         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1624         col++; a += bs;
1625       }
1626     }
1627   }
1628   /* copy over the B part */
1629   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1630   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1631   for (i=0; i<mbs; i++) {
1632     rvals[0] = bs*(baij->rstart + i);
1633     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1634     for (j=ai[i]; j<ai[i+1]; j++) {
1635       col = baij->garray[aj[j]]*bs;
1636       for (k=0; k<bs; k++) {
1637         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1638         col++; a += bs;
1639       }
1640     }
1641   }
1642   ierr = PetscFree(rvals);CHKERRQ(ierr);
1643   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1645 
1646   if (matout) {
1647     *matout = B;
1648   } else {
1649     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1650   }
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 #undef __FUNCT__
1655 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1656 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1657 {
1658   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1659   Mat            a = baij->A,b = baij->B;
1660   PetscErrorCode ierr;
1661   PetscInt       s1,s2,s3;
1662 
1663   PetscFunctionBegin;
1664   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1665   if (rr) {
1666     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1667     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1668     /* Overlap communication with computation. */
1669     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1670   }
1671   if (ll) {
1672     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1673     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1674     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1675   }
1676   /* scale  the diagonal block */
1677   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1678 
1679   if (rr) {
1680     /* Do a scatter end and then right scale the off-diagonal block */
1681     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1682     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1683   }
1684 
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1690 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1691 {
1692   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1693   PetscErrorCode ierr;
1694   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1695   PetscInt       i,N,*rows,*owners = l->rowners;
1696   PetscInt       *nprocs,j,idx,nsends,row;
1697   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1698   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1699   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1700   MPI_Comm       comm = A->comm;
1701   MPI_Request    *send_waits,*recv_waits;
1702   MPI_Status     recv_status,*send_status;
1703   IS             istmp;
1704 #if defined(PETSC_DEBUG)
1705   PetscTruth     found = PETSC_FALSE;
1706 #endif
1707 
1708   PetscFunctionBegin;
1709   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1710   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1711 
1712   /*  first count number of contributors to each processor */
1713   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1714   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1715   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1716   j = 0;
1717   for (i=0; i<N; i++) {
1718     if (lastidx > (idx = rows[i])) j = 0;
1719     lastidx = idx;
1720     for (; j<size; j++) {
1721       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1722         nprocs[2*j]++;
1723         nprocs[2*j+1] = 1;
1724         owner[i] = j;
1725 #if defined(PETSC_DEBUG)
1726         found = PETSC_TRUE;
1727 #endif
1728         break;
1729       }
1730     }
1731 #if defined(PETSC_DEBUG)
1732     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1733     found = PETSC_FALSE;
1734 #endif
1735   }
1736   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1737 
1738   /* inform other processors of number of messages and max length*/
1739   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1740 
1741   /* post receives:   */
1742   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1743   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1744   for (i=0; i<nrecvs; i++) {
1745     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1746   }
1747 
1748   /* do sends:
1749      1) starts[i] gives the starting index in svalues for stuff going to
1750      the ith processor
1751   */
1752   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1753   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1754   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1755   starts[0]  = 0;
1756   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1757   for (i=0; i<N; i++) {
1758     svalues[starts[owner[i]]++] = rows[i];
1759   }
1760   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1761 
1762   starts[0] = 0;
1763   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1764   count = 0;
1765   for (i=0; i<size; i++) {
1766     if (nprocs[2*i+1]) {
1767       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1768     }
1769   }
1770   ierr = PetscFree(starts);CHKERRQ(ierr);
1771 
1772   base = owners[rank]*bs;
1773 
1774   /*  wait on receives */
1775   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1776   source = lens + nrecvs;
1777   count  = nrecvs; slen = 0;
1778   while (count) {
1779     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1780     /* unpack receives into our local space */
1781     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1782     source[imdex]  = recv_status.MPI_SOURCE;
1783     lens[imdex]    = n;
1784     slen          += n;
1785     count--;
1786   }
1787   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1788 
1789   /* move the data into the send scatter */
1790   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1791   count = 0;
1792   for (i=0; i<nrecvs; i++) {
1793     values = rvalues + i*nmax;
1794     for (j=0; j<lens[i]; j++) {
1795       lrows[count++] = values[j] - base;
1796     }
1797   }
1798   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1799   ierr = PetscFree(lens);CHKERRQ(ierr);
1800   ierr = PetscFree(owner);CHKERRQ(ierr);
1801   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1802 
1803   /* actually zap the local rows */
1804   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1805   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
1806 
1807   /*
1808         Zero the required rows. If the "diagonal block" of the matrix
1809      is square and the user wishes to set the diagonal we use seperate
1810      code so that MatSetValues() is not called for each diagonal allocating
1811      new memory, thus calling lots of mallocs and slowing things down.
1812 
1813        Contributed by: Mathew Knepley
1814   */
1815   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1816   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1817   if (diag && (l->A->M == l->A->N)) {
1818     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1819   } else if (diag) {
1820     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1821     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1822       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1823 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1824     }
1825     for (i=0; i<slen; i++) {
1826       row  = lrows[i] + rstart_bs;
1827       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1828     }
1829     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1830     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1831   } else {
1832     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1833   }
1834 
1835   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1836   ierr = PetscFree(lrows);CHKERRQ(ierr);
1837 
1838   /* wait on sends */
1839   if (nsends) {
1840     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1841     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1842     ierr = PetscFree(send_status);CHKERRQ(ierr);
1843   }
1844   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1845   ierr = PetscFree(svalues);CHKERRQ(ierr);
1846 
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1852 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1853 {
1854   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1855   MPI_Comm          comm = A->comm;
1856   static PetscTruth called = PETSC_FALSE;
1857   PetscErrorCode    ierr;
1858 
1859   PetscFunctionBegin;
1860   if (!a->rank) {
1861     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1862   }
1863   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1864   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1865   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1871 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1872 {
1873   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1874   PetscErrorCode ierr;
1875 
1876   PetscFunctionBegin;
1877   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatEqual_MPIBAIJ"
1885 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1886 {
1887   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1888   Mat            a,b,c,d;
1889   PetscTruth     flg;
1890   PetscErrorCode ierr;
1891 
1892   PetscFunctionBegin;
1893   a = matA->A; b = matA->B;
1894   c = matB->A; d = matB->B;
1895 
1896   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1897   if (flg) {
1898     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1899   }
1900   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 
1905 #undef __FUNCT__
1906 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1907 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1908 {
1909   PetscErrorCode ierr;
1910 
1911   PetscFunctionBegin;
1912   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 /* -------------------------------------------------------------------*/
1917 static struct _MatOps MatOps_Values = {
1918        MatSetValues_MPIBAIJ,
1919        MatGetRow_MPIBAIJ,
1920        MatRestoreRow_MPIBAIJ,
1921        MatMult_MPIBAIJ,
1922 /* 4*/ MatMultAdd_MPIBAIJ,
1923        MatMultTranspose_MPIBAIJ,
1924        MatMultTransposeAdd_MPIBAIJ,
1925        0,
1926        0,
1927        0,
1928 /*10*/ 0,
1929        0,
1930        0,
1931        0,
1932        MatTranspose_MPIBAIJ,
1933 /*15*/ MatGetInfo_MPIBAIJ,
1934        MatEqual_MPIBAIJ,
1935        MatGetDiagonal_MPIBAIJ,
1936        MatDiagonalScale_MPIBAIJ,
1937        MatNorm_MPIBAIJ,
1938 /*20*/ MatAssemblyBegin_MPIBAIJ,
1939        MatAssemblyEnd_MPIBAIJ,
1940        0,
1941        MatSetOption_MPIBAIJ,
1942        MatZeroEntries_MPIBAIJ,
1943 /*25*/ MatZeroRows_MPIBAIJ,
1944        0,
1945        0,
1946        0,
1947        0,
1948 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1949        0,
1950        0,
1951        0,
1952        0,
1953 /*35*/ MatDuplicate_MPIBAIJ,
1954        0,
1955        0,
1956        0,
1957        0,
1958 /*40*/ 0,
1959        MatGetSubMatrices_MPIBAIJ,
1960        MatIncreaseOverlap_MPIBAIJ,
1961        MatGetValues_MPIBAIJ,
1962        0,
1963 /*45*/ MatPrintHelp_MPIBAIJ,
1964        MatScale_MPIBAIJ,
1965        0,
1966        0,
1967        0,
1968 /*50*/ 0,
1969        0,
1970        0,
1971        0,
1972        0,
1973 /*55*/ 0,
1974        0,
1975        MatSetUnfactored_MPIBAIJ,
1976        0,
1977        MatSetValuesBlocked_MPIBAIJ,
1978 /*60*/ 0,
1979        MatDestroy_MPIBAIJ,
1980        MatView_MPIBAIJ,
1981        MatGetPetscMaps_Petsc,
1982        0,
1983 /*65*/ 0,
1984        0,
1985        0,
1986        0,
1987        0,
1988 /*70*/ MatGetRowMax_MPIBAIJ,
1989        0,
1990        0,
1991        0,
1992        0,
1993 /*75*/ 0,
1994        0,
1995        0,
1996        0,
1997        0,
1998 /*80*/ 0,
1999        0,
2000        0,
2001        0,
2002        MatLoad_MPIBAIJ,
2003 /*85*/ 0,
2004        0,
2005        0,
2006        0,
2007        0,
2008 /*90*/ 0,
2009        0,
2010        0,
2011        0,
2012        0,
2013 /*95*/ 0,
2014        0,
2015        0,
2016        0};
2017 
2018 
2019 EXTERN_C_BEGIN
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2022 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2023 {
2024   PetscFunctionBegin;
2025   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2026   *iscopy = PETSC_FALSE;
2027   PetscFunctionReturn(0);
2028 }
2029 EXTERN_C_END
2030 
2031 EXTERN_C_BEGIN
2032 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2033 EXTERN_C_END
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2037 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2038 {
2039   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2040   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2041   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2042   const PetscInt *JJ;
2043   PetscScalar    *values;
2044   PetscErrorCode ierr;
2045 
2046   PetscFunctionBegin;
2047 #if defined(PETSC_OPT_g)
2048   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2049 #endif
2050   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2051   o_nnz = d_nnz + m;
2052 
2053   for (i=0; i<m; i++) {
2054     nnz     = I[i+1]- I[i];
2055     JJ      = J + I[i];
2056     nnz_max = PetscMax(nnz_max,nnz);
2057 #if defined(PETSC_OPT_g)
2058     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2059 #endif
2060     for (j=0; j<nnz; j++) {
2061       if (*JJ >= cstart) break;
2062       JJ++;
2063     }
2064     d = 0;
2065     for (; j<nnz; j++) {
2066       if (*JJ++ >= cend) break;
2067       d++;
2068     }
2069     d_nnz[i] = d;
2070     o_nnz[i] = nnz - d;
2071   }
2072   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2073   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2074 
2075   if (v) values = (PetscScalar*)v;
2076   else {
2077     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2078     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2079   }
2080 
2081   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2082   for (i=0; i<m; i++) {
2083     ii   = i + rstart;
2084     nnz  = I[i+1]- I[i];
2085     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2086   }
2087   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2088   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2089   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2090 
2091   if (!v) {
2092     ierr = PetscFree(values);CHKERRQ(ierr);
2093   }
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 #undef __FUNCT__
2098 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2099 /*@C
2100    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2101    (the default parallel PETSc format).
2102 
2103    Collective on MPI_Comm
2104 
2105    Input Parameters:
2106 +  A - the matrix
2107 .  i - the indices into j for the start of each local row (starts with zero)
2108 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2109 -  v - optional values in the matrix
2110 
2111    Level: developer
2112 
2113 .keywords: matrix, aij, compressed row, sparse, parallel
2114 
2115 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2116 @*/
2117 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2118 {
2119   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2120 
2121   PetscFunctionBegin;
2122   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2123   if (f) {
2124     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2125   }
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 EXTERN_C_BEGIN
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2132 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2133 {
2134   Mat_MPIBAIJ    *b;
2135   PetscErrorCode ierr;
2136   PetscInt       i;
2137 
2138   PetscFunctionBegin;
2139   B->preallocated = PETSC_TRUE;
2140   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2141 
2142   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2143   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2144   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2145   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2146   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2147   if (d_nnz) {
2148   for (i=0; i<B->m/bs; i++) {
2149       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2150     }
2151   }
2152   if (o_nnz) {
2153     for (i=0; i<B->m/bs; i++) {
2154       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2155     }
2156   }
2157 
2158   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2159   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2160   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2161   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2162 
2163   b = (Mat_MPIBAIJ*)B->data;
2164   B->bs  = bs;
2165   b->bs2 = bs*bs;
2166   b->mbs = B->m/bs;
2167   b->nbs = B->n/bs;
2168   b->Mbs = B->M/bs;
2169   b->Nbs = B->N/bs;
2170 
2171   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2172   b->rowners[0]    = 0;
2173   for (i=2; i<=b->size; i++) {
2174     b->rowners[i] += b->rowners[i-1];
2175   }
2176   b->rstart    = b->rowners[b->rank];
2177   b->rend      = b->rowners[b->rank+1];
2178 
2179   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2180   b->cowners[0] = 0;
2181   for (i=2; i<=b->size; i++) {
2182     b->cowners[i] += b->cowners[i-1];
2183   }
2184   b->cstart    = b->cowners[b->rank];
2185   b->cend      = b->cowners[b->rank+1];
2186 
2187   for (i=0; i<=b->size; i++) {
2188     b->rowners_bs[i] = b->rowners[i]*bs;
2189   }
2190   b->rstart_bs = b->rstart*bs;
2191   b->rend_bs   = b->rend*bs;
2192   b->cstart_bs = b->cstart*bs;
2193   b->cend_bs   = b->cend*bs;
2194 
2195   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2196   ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr);
2197   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2198   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2199   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2200   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2201   ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr);
2202   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2203   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2204   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2205 
2206   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2207 
2208   PetscFunctionReturn(0);
2209 }
2210 EXTERN_C_END
2211 
2212 EXTERN_C_BEGIN
2213 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2214 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2215 EXTERN_C_END
2216 
2217 /*MC
2218    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2219 
2220    Options Database Keys:
2221 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2222 
2223   Level: beginner
2224 
2225 .seealso: MatCreateMPIBAIJ
2226 M*/
2227 
2228 EXTERN_C_BEGIN
2229 #undef __FUNCT__
2230 #define __FUNCT__ "MatCreate_MPIBAIJ"
2231 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2232 {
2233   Mat_MPIBAIJ    *b;
2234   PetscErrorCode ierr;
2235   PetscTruth     flg;
2236 
2237   PetscFunctionBegin;
2238   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2239   B->data = (void*)b;
2240 
2241 
2242   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2243   B->mapping    = 0;
2244   B->factor     = 0;
2245   B->assembled  = PETSC_FALSE;
2246 
2247   B->insertmode = NOT_SET_VALUES;
2248   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2249   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2250 
2251   /* build local table of row and column ownerships */
2252   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2253   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2254   b->cowners    = b->rowners + b->size + 2;
2255   b->rowners_bs = b->cowners + b->size + 2;
2256 
2257   /* build cache for off array entries formed */
2258   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2259   b->donotstash  = PETSC_FALSE;
2260   b->colmap      = PETSC_NULL;
2261   b->garray      = PETSC_NULL;
2262   b->roworiented = PETSC_TRUE;
2263 
2264 #if defined(PETSC_USE_MAT_SINGLE)
2265   /* stuff for MatSetValues_XXX in single precision */
2266   b->setvalueslen     = 0;
2267   b->setvaluescopy    = PETSC_NULL;
2268 #endif
2269 
2270   /* stuff used in block assembly */
2271   b->barray       = 0;
2272 
2273   /* stuff used for matrix vector multiply */
2274   b->lvec         = 0;
2275   b->Mvctx        = 0;
2276 
2277   /* stuff for MatGetRow() */
2278   b->rowindices   = 0;
2279   b->rowvalues    = 0;
2280   b->getrowactive = PETSC_FALSE;
2281 
2282   /* hash table stuff */
2283   b->ht           = 0;
2284   b->hd           = 0;
2285   b->ht_size      = 0;
2286   b->ht_flag      = PETSC_FALSE;
2287   b->ht_fact      = 0;
2288   b->ht_total_ct  = 0;
2289   b->ht_insert_ct = 0;
2290 
2291   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2292   if (flg) {
2293     PetscReal fact = 1.39;
2294     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2295     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2296     if (fact <= 1.0) fact = 1.39;
2297     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2298     ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
2299   }
2300   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2301                                      "MatStoreValues_MPIBAIJ",
2302                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2303   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2304                                      "MatRetrieveValues_MPIBAIJ",
2305                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2306   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2307                                      "MatGetDiagonalBlock_MPIBAIJ",
2308                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2309   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2310                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2311                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2312   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2313 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2314 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2315   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2316                                      "MatDiagonalScaleLocal_MPIBAIJ",
2317                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2318   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2319                                      "MatSetHashTableFactor_MPIBAIJ",
2320                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2321   PetscFunctionReturn(0);
2322 }
2323 EXTERN_C_END
2324 
2325 /*MC
2326    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2327 
2328    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2329    and MATMPIBAIJ otherwise.
2330 
2331    Options Database Keys:
2332 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2333 
2334   Level: beginner
2335 
2336 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2337 M*/
2338 
2339 EXTERN_C_BEGIN
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatCreate_BAIJ"
2342 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2343 {
2344   PetscErrorCode ierr;
2345   PetscMPIInt    size;
2346 
2347   PetscFunctionBegin;
2348   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2349   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2350   if (size == 1) {
2351     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2352   } else {
2353     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2354   }
2355   PetscFunctionReturn(0);
2356 }
2357 EXTERN_C_END
2358 
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2361 /*@C
2362    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2363    (block compressed row).  For good matrix assembly performance
2364    the user should preallocate the matrix storage by setting the parameters
2365    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2366    performance can be increased by more than a factor of 50.
2367 
2368    Collective on Mat
2369 
2370    Input Parameters:
2371 +  A - the matrix
2372 .  bs   - size of blockk
2373 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2374            submatrix  (same for all local rows)
2375 .  d_nnz - array containing the number of block nonzeros in the various block rows
2376            of the in diagonal portion of the local (possibly different for each block
2377            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2378 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2379            submatrix (same for all local rows).
2380 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2381            off-diagonal portion of the local submatrix (possibly different for
2382            each block row) or PETSC_NULL.
2383 
2384    If the *_nnz parameter is given then the *_nz parameter is ignored
2385 
2386    Options Database Keys:
2387 .   -mat_no_unroll - uses code that does not unroll the loops in the
2388                      block calculations (much slower)
2389 .   -mat_block_size - size of the blocks to use
2390 
2391    Notes:
2392    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2393    than it must be used on all processors that share the object for that argument.
2394 
2395    Storage Information:
2396    For a square global matrix we define each processor's diagonal portion
2397    to be its local rows and the corresponding columns (a square submatrix);
2398    each processor's off-diagonal portion encompasses the remainder of the
2399    local matrix (a rectangular submatrix).
2400 
2401    The user can specify preallocated storage for the diagonal part of
2402    the local submatrix with either d_nz or d_nnz (not both).  Set
2403    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2404    memory allocation.  Likewise, specify preallocated storage for the
2405    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2406 
2407    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2408    the figure below we depict these three local rows and all columns (0-11).
2409 
2410 .vb
2411            0 1 2 3 4 5 6 7 8 9 10 11
2412           -------------------
2413    row 3  |  o o o d d d o o o o o o
2414    row 4  |  o o o d d d o o o o o o
2415    row 5  |  o o o d d d o o o o o o
2416           -------------------
2417 .ve
2418 
2419    Thus, any entries in the d locations are stored in the d (diagonal)
2420    submatrix, and any entries in the o locations are stored in the
2421    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2422    stored simply in the MATSEQBAIJ format for compressed row storage.
2423 
2424    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2425    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2426    In general, for PDE problems in which most nonzeros are near the diagonal,
2427    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2428    or you will get TERRIBLE performance; see the users' manual chapter on
2429    matrices.
2430 
2431    Level: intermediate
2432 
2433 .keywords: matrix, block, aij, compressed row, sparse, parallel
2434 
2435 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2436 @*/
2437 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2438 {
2439   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2440 
2441   PetscFunctionBegin;
2442   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2443   if (f) {
2444     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2445   }
2446   PetscFunctionReturn(0);
2447 }
2448 
2449 #undef __FUNCT__
2450 #define __FUNCT__ "MatCreateMPIBAIJ"
2451 /*@C
2452    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2453    (block compressed row).  For good matrix assembly performance
2454    the user should preallocate the matrix storage by setting the parameters
2455    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2456    performance can be increased by more than a factor of 50.
2457 
2458    Collective on MPI_Comm
2459 
2460    Input Parameters:
2461 +  comm - MPI communicator
2462 .  bs   - size of blockk
2463 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2464            This value should be the same as the local size used in creating the
2465            y vector for the matrix-vector product y = Ax.
2466 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2467            This value should be the same as the local size used in creating the
2468            x vector for the matrix-vector product y = Ax.
2469 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2470 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2471 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2472            submatrix  (same for all local rows)
2473 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2474            of the in diagonal portion of the local (possibly different for each block
2475            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2476 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2477            submatrix (same for all local rows).
2478 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2479            off-diagonal portion of the local submatrix (possibly different for
2480            each block row) or PETSC_NULL.
2481 
2482    Output Parameter:
2483 .  A - the matrix
2484 
2485    Options Database Keys:
2486 .   -mat_no_unroll - uses code that does not unroll the loops in the
2487                      block calculations (much slower)
2488 .   -mat_block_size - size of the blocks to use
2489 
2490    Notes:
2491    If the *_nnz parameter is given then the *_nz parameter is ignored
2492 
2493    A nonzero block is any block that as 1 or more nonzeros in it
2494 
2495    The user MUST specify either the local or global matrix dimensions
2496    (possibly both).
2497 
2498    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2499    than it must be used on all processors that share the object for that argument.
2500 
2501    Storage Information:
2502    For a square global matrix we define each processor's diagonal portion
2503    to be its local rows and the corresponding columns (a square submatrix);
2504    each processor's off-diagonal portion encompasses the remainder of the
2505    local matrix (a rectangular submatrix).
2506 
2507    The user can specify preallocated storage for the diagonal part of
2508    the local submatrix with either d_nz or d_nnz (not both).  Set
2509    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2510    memory allocation.  Likewise, specify preallocated storage for the
2511    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2512 
2513    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2514    the figure below we depict these three local rows and all columns (0-11).
2515 
2516 .vb
2517            0 1 2 3 4 5 6 7 8 9 10 11
2518           -------------------
2519    row 3  |  o o o d d d o o o o o o
2520    row 4  |  o o o d d d o o o o o o
2521    row 5  |  o o o d d d o o o o o o
2522           -------------------
2523 .ve
2524 
2525    Thus, any entries in the d locations are stored in the d (diagonal)
2526    submatrix, and any entries in the o locations are stored in the
2527    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2528    stored simply in the MATSEQBAIJ format for compressed row storage.
2529 
2530    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2531    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2532    In general, for PDE problems in which most nonzeros are near the diagonal,
2533    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2534    or you will get TERRIBLE performance; see the users' manual chapter on
2535    matrices.
2536 
2537    Level: intermediate
2538 
2539 .keywords: matrix, block, aij, compressed row, sparse, parallel
2540 
2541 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2542 @*/
2543 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2544 {
2545   PetscErrorCode ierr;
2546   PetscMPIInt    size;
2547 
2548   PetscFunctionBegin;
2549   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2550   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2551   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2552   if (size > 1) {
2553     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2554     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2555   } else {
2556     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2557     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2558   }
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2564 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2565 {
2566   Mat            mat;
2567   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2568   PetscErrorCode ierr;
2569   PetscInt       len=0;
2570 
2571   PetscFunctionBegin;
2572   *newmat       = 0;
2573   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2574   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
2575   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2576   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2577 
2578   mat->factor       = matin->factor;
2579   mat->preallocated = PETSC_TRUE;
2580   mat->assembled    = PETSC_TRUE;
2581   mat->insertmode   = NOT_SET_VALUES;
2582 
2583   a      = (Mat_MPIBAIJ*)mat->data;
2584   mat->bs  = matin->bs;
2585   a->bs2   = oldmat->bs2;
2586   a->mbs   = oldmat->mbs;
2587   a->nbs   = oldmat->nbs;
2588   a->Mbs   = oldmat->Mbs;
2589   a->Nbs   = oldmat->Nbs;
2590 
2591   a->rstart       = oldmat->rstart;
2592   a->rend         = oldmat->rend;
2593   a->cstart       = oldmat->cstart;
2594   a->cend         = oldmat->cend;
2595   a->size         = oldmat->size;
2596   a->rank         = oldmat->rank;
2597   a->donotstash   = oldmat->donotstash;
2598   a->roworiented  = oldmat->roworiented;
2599   a->rowindices   = 0;
2600   a->rowvalues    = 0;
2601   a->getrowactive = PETSC_FALSE;
2602   a->barray       = 0;
2603   a->rstart_bs    = oldmat->rstart_bs;
2604   a->rend_bs      = oldmat->rend_bs;
2605   a->cstart_bs    = oldmat->cstart_bs;
2606   a->cend_bs      = oldmat->cend_bs;
2607 
2608   /* hash table stuff */
2609   a->ht           = 0;
2610   a->hd           = 0;
2611   a->ht_size      = 0;
2612   a->ht_flag      = oldmat->ht_flag;
2613   a->ht_fact      = oldmat->ht_fact;
2614   a->ht_total_ct  = 0;
2615   a->ht_insert_ct = 0;
2616 
2617   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2618   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2619   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2620   if (oldmat->colmap) {
2621 #if defined (PETSC_USE_CTABLE)
2622   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2623 #else
2624   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2625   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2626   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2627 #endif
2628   } else a->colmap = 0;
2629 
2630   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2631     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2632     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2633     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2634   } else a->garray = 0;
2635 
2636   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2637   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2638   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2639   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2640 
2641   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2642   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2643   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2644   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2645   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2646   *newmat = mat;
2647 
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 #include "petscsys.h"
2652 
2653 #undef __FUNCT__
2654 #define __FUNCT__ "MatLoad_MPIBAIJ"
2655 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2656 {
2657   Mat            A;
2658   PetscErrorCode ierr;
2659   int            fd;
2660   PetscInt       i,nz,j,rstart,rend;
2661   PetscScalar    *vals,*buf;
2662   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2663   MPI_Status     status;
2664   PetscMPIInt    rank,size,maxnz;
2665   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2666   PetscInt       *locrowlens,*procsnz = 0,*browners;
2667   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2668   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2669   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2670   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2671 
2672   PetscFunctionBegin;
2673   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2674 
2675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2676   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2677   if (!rank) {
2678     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2679     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2680     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2681   }
2682 
2683   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2684   M = header[1]; N = header[2];
2685 
2686   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2687 
2688   /*
2689      This code adds extra rows to make sure the number of rows is
2690      divisible by the blocksize
2691   */
2692   Mbs        = M/bs;
2693   extra_rows = bs - M + bs*Mbs;
2694   if (extra_rows == bs) extra_rows = 0;
2695   else                  Mbs++;
2696   if (extra_rows && !rank) {
2697     ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2698   }
2699 
2700   /* determine ownership of all rows */
2701   mbs        = Mbs/size + ((Mbs % size) > rank);
2702   m          = mbs*bs;
2703   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2704   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2705 
2706   /* process 0 needs enough room for process with most rows */
2707   if (!rank) {
2708     mmax = rowners[1];
2709     for (i=2; i<size; i++) {
2710       mmax = PetscMax(mmax,rowners[i]);
2711     }
2712     mmax*=bs;
2713   } else mmax = m;
2714 
2715   rowners[0] = 0;
2716   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2717   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2718   rstart = rowners[rank];
2719   rend   = rowners[rank+1];
2720 
2721   /* distribute row lengths to all processors */
2722   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2723   if (!rank) {
2724     mend = m;
2725     if (size == 1) mend = mend - extra_rows;
2726     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2727     for (j=mend; j<m; j++) locrowlens[j] = 1;
2728     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2729     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2730     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2731     for (j=0; j<m; j++) {
2732       procsnz[0] += locrowlens[j];
2733     }
2734     for (i=1; i<size; i++) {
2735       mend = browners[i+1] - browners[i];
2736       if (i == size-1) mend = mend - extra_rows;
2737       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2738       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2739       /* calculate the number of nonzeros on each processor */
2740       for (j=0; j<browners[i+1]-browners[i]; j++) {
2741         procsnz[i] += rowlengths[j];
2742       }
2743       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2744     }
2745     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2746   } else {
2747     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2748   }
2749 
2750   if (!rank) {
2751     /* determine max buffer needed and allocate it */
2752     maxnz = procsnz[0];
2753     for (i=1; i<size; i++) {
2754       maxnz = PetscMax(maxnz,procsnz[i]);
2755     }
2756     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2757 
2758     /* read in my part of the matrix column indices  */
2759     nz     = procsnz[0];
2760     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2761     mycols = ibuf;
2762     if (size == 1)  nz -= extra_rows;
2763     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2764     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2765 
2766     /* read in every ones (except the last) and ship off */
2767     for (i=1; i<size-1; i++) {
2768       nz   = procsnz[i];
2769       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2770       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2771     }
2772     /* read in the stuff for the last proc */
2773     if (size != 1) {
2774       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2775       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2776       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2777       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2778     }
2779     ierr = PetscFree(cols);CHKERRQ(ierr);
2780   } else {
2781     /* determine buffer space needed for message */
2782     nz = 0;
2783     for (i=0; i<m; i++) {
2784       nz += locrowlens[i];
2785     }
2786     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2787     mycols = ibuf;
2788     /* receive message of column indices*/
2789     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2790     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2791     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2792   }
2793 
2794   /* loop over local rows, determining number of off diagonal entries */
2795   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2796   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2797   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2798   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2799   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2800   rowcount = 0; nzcount = 0;
2801   for (i=0; i<mbs; i++) {
2802     dcount  = 0;
2803     odcount = 0;
2804     for (j=0; j<bs; j++) {
2805       kmax = locrowlens[rowcount];
2806       for (k=0; k<kmax; k++) {
2807         tmp = mycols[nzcount++]/bs;
2808         if (!mask[tmp]) {
2809           mask[tmp] = 1;
2810           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2811           else masked1[dcount++] = tmp;
2812         }
2813       }
2814       rowcount++;
2815     }
2816 
2817     dlens[i]  = dcount;
2818     odlens[i] = odcount;
2819 
2820     /* zero out the mask elements we set */
2821     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2822     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2823   }
2824 
2825   /* create our matrix */
2826   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2827   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2828   ierr = MatSetType(A,type);CHKERRQ(ierr)
2829   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2830 
2831   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2832   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2833 
2834   if (!rank) {
2835     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2836     /* read in my part of the matrix numerical values  */
2837     nz = procsnz[0];
2838     vals = buf;
2839     mycols = ibuf;
2840     if (size == 1)  nz -= extra_rows;
2841     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2842     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2843 
2844     /* insert into matrix */
2845     jj      = rstart*bs;
2846     for (i=0; i<m; i++) {
2847       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2848       mycols += locrowlens[i];
2849       vals   += locrowlens[i];
2850       jj++;
2851     }
2852     /* read in other processors (except the last one) and ship out */
2853     for (i=1; i<size-1; i++) {
2854       nz   = procsnz[i];
2855       vals = buf;
2856       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2857       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2858     }
2859     /* the last proc */
2860     if (size != 1){
2861       nz   = procsnz[i] - extra_rows;
2862       vals = buf;
2863       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2864       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2865       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2866     }
2867     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2868   } else {
2869     /* receive numeric values */
2870     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2871 
2872     /* receive message of values*/
2873     vals   = buf;
2874     mycols = ibuf;
2875     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2876     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2877     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2878 
2879     /* insert into matrix */
2880     jj      = rstart*bs;
2881     for (i=0; i<m; i++) {
2882       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2883       mycols += locrowlens[i];
2884       vals   += locrowlens[i];
2885       jj++;
2886     }
2887   }
2888   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2889   ierr = PetscFree(buf);CHKERRQ(ierr);
2890   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2891   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2892   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2893   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2894   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2895   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2896 
2897   *newmat = A;
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 #undef __FUNCT__
2902 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2903 /*@
2904    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2905 
2906    Input Parameters:
2907 .  mat  - the matrix
2908 .  fact - factor
2909 
2910    Collective on Mat
2911 
2912    Level: advanced
2913 
2914   Notes:
2915    This can also be set by the command line option: -mat_use_hash_table fact
2916 
2917 .keywords: matrix, hashtable, factor, HT
2918 
2919 .seealso: MatSetOption()
2920 @*/
2921 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2922 {
2923   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2924 
2925   PetscFunctionBegin;
2926   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2927   if (f) {
2928     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2929   }
2930   PetscFunctionReturn(0);
2931 }
2932 
2933 EXTERN_C_BEGIN
2934 #undef __FUNCT__
2935 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2936 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2937 {
2938   Mat_MPIBAIJ *baij;
2939 
2940   PetscFunctionBegin;
2941   baij = (Mat_MPIBAIJ*)mat->data;
2942   baij->ht_fact = fact;
2943   PetscFunctionReturn(0);
2944 }
2945 EXTERN_C_END
2946 
2947 #undef __FUNCT__
2948 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2949 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2950 {
2951   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2952   PetscFunctionBegin;
2953   *Ad     = a->A;
2954   *Ao     = a->B;
2955   *colmap = a->garray;
2956   PetscFunctionReturn(0);
2957 }
2958