xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 8bc8193efbc389280f83b3d41dffa9e2d23e2ace)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 
4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
109   PetscErrorCode ierr;
110   PetscInt       nbs = B->nbs,i,bs=mat->bs;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
286 {
287   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       i,N = m*n;
290   MatScalar      *vsingle;
291 
292   PetscFunctionBegin;
293   if (N > b->setvalueslen) {
294     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
295     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
296     b->setvalueslen  = N;
297   }
298   vsingle = b->setvaluescopy;
299 
300   for (i=0; i<N; i++) {
301     vsingle[i] = v[i];
302   }
303   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
310 {
311   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
312   PetscErrorCode ierr;
313   PetscInt       i,N = m*n*b->bs2;
314   MatScalar      *vsingle;
315 
316   PetscFunctionBegin;
317   if (N > b->setvalueslen) {
318     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
319     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
320     b->setvalueslen  = N;
321   }
322   vsingle = b->setvaluescopy;
323   for (i=0; i<N; i++) {
324     vsingle[i] = v[i];
325   }
326   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
333 {
334   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
335   PetscErrorCode ierr;
336   PetscInt       i,N = m*n;
337   MatScalar      *vsingle;
338 
339   PetscFunctionBegin;
340   if (N > b->setvalueslen) {
341     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
342     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
343     b->setvalueslen  = N;
344   }
345   vsingle = b->setvaluescopy;
346   for (i=0; i<N; i++) {
347     vsingle[i] = v[i];
348   }
349   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
356 {
357   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
358   PetscErrorCode ierr;
359   PetscInt       i,N = m*n*b->bs2;
360   MatScalar      *vsingle;
361 
362   PetscFunctionBegin;
363   if (N > b->setvalueslen) {
364     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
365     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
366     b->setvalueslen  = N;
367   }
368   vsingle = b->setvaluescopy;
369   for (i=0; i<N; i++) {
370     vsingle[i] = v[i];
371   }
372   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 #endif
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
380 {
381   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
382   MatScalar      value;
383   PetscTruth     roworiented = baij->roworiented;
384   PetscErrorCode ierr;
385   PetscInt       i,j,row,col;
386   PetscInt       rstart_orig=baij->rstart_bs;
387   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
388   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
389 
390   /* Some Variables required in the macro */
391   Mat            A = baij->A;
392   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
393   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
394   MatScalar      *aa=a->a;
395 
396   Mat            B = baij->B;
397   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
398   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
399   MatScalar      *ba=b->a;
400 
401   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
402   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
403   MatScalar      *ap,*bap;
404 
405   PetscFunctionBegin;
406   for (i=0; i<m; i++) {
407     if (im[i] < 0) continue;
408 #if defined(PETSC_USE_BOPT_g)
409     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
410 #endif
411     if (im[i] >= rstart_orig && im[i] < rend_orig) {
412       row = im[i] - rstart_orig;
413       for (j=0; j<n; j++) {
414         if (in[j] >= cstart_orig && in[j] < cend_orig){
415           col = in[j] - cstart_orig;
416           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
417           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
418           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_BOPT_g)
421         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!baij->colmap) {
426               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
430             col  = col - 1;
431 #else
432             col = baij->colmap[in[j]/bs] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
435               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
438               B = baij->B;
439               b = (Mat_SeqBAIJ*)(B)->data;
440               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
441               ba=b->a;
442             } else col += in[j]%bs;
443           } else col = in[j];
444           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
445           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
446           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
447         }
448       }
449     } else {
450       if (!baij->donotstash) {
451         if (roworiented) {
452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453         } else {
454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
455         }
456       }
457     }
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
467   const MatScalar *value;
468   MatScalar       *barray=baij->barray;
469   PetscTruth      roworiented = baij->roworiented;
470   PetscErrorCode  ierr;
471   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
472   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
473   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
474 
475   PetscFunctionBegin;
476   if(!barray) {
477     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
478     baij->barray = barray;
479   }
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487     if (im[i] < 0) continue;
488 #if defined(PETSC_USE_BOPT_g)
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     if (im[i] >= rstart && im[i] < rend) {
492       row = im[i] - rstart;
493       for (j=0; j<n; j++) {
494         /* If NumCol = 1 then a copy is not required */
495         if ((roworiented) && (n == 1)) {
496           barray = (MatScalar*)v + i*bs2;
497         } else if((!roworiented) && (m == 1)) {
498           barray = (MatScalar*)v + j*bs2;
499         } else { /* Here a copy is required */
500           if (roworiented) {
501             value = v + i*(stepval+bs)*bs + j*bs;
502           } else {
503             value = v + j*(stepval+bs)*bs + i*bs;
504           }
505           for (ii=0; ii<bs; ii++,value+=stepval) {
506             for (jj=0; jj<bs; jj++) {
507               *barray++  = *value++;
508             }
509           }
510           barray -=bs2;
511         }
512 
513         if (in[j] >= cstart && in[j] < cend){
514           col  = in[j] - cstart;
515           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516         }
517         else if (in[j] < 0) continue;
518 #if defined(PETSC_USE_BOPT_g)
519         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
520 #endif
521         else {
522           if (mat->was_assembled) {
523             if (!baij->colmap) {
524               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
525             }
526 
527 #if defined(PETSC_USE_BOPT_g)
528 #if defined (PETSC_USE_CTABLE)
529             { PetscInt data;
530               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
531               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
532             }
533 #else
534             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
535 #endif
536 #endif
537 #if defined (PETSC_USE_CTABLE)
538 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
539             col  = (col - 1)/bs;
540 #else
541             col = (baij->colmap[in[j]] - 1)/bs;
542 #endif
543             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
544               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
545               col =  in[j];
546             }
547           }
548           else col = in[j];
549           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
550         }
551       }
552     } else {
553       if (!baij->donotstash) {
554         if (roworiented) {
555           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556         } else {
557           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
558         }
559       }
560     }
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #define HASH_KEY 0.6180339887
566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
569 #undef __FUNCT__
570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
572 {
573   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
574   PetscTruth     roworiented = baij->roworiented;
575   PetscErrorCode ierr;
576   PetscInt       i,j,row,col;
577   PetscInt       rstart_orig=baij->rstart_bs;
578   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
579   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
580   PetscReal      tmp;
581   MatScalar      **HD = baij->hd,value;
582 #if defined(PETSC_USE_BOPT_g)
583   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
584 #endif
585 
586   PetscFunctionBegin;
587 
588   for (i=0; i<m; i++) {
589 #if defined(PETSC_USE_BOPT_g)
590     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
591     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
592 #endif
593       row = im[i];
594     if (row >= rstart_orig && row < rend_orig) {
595       for (j=0; j<n; j++) {
596         col = in[j];
597         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
598         /* Look up PetscInto the Hash Table */
599         key = (row/bs)*Nbs+(col/bs)+1;
600         h1  = HASH(size,key,tmp);
601 
602 
603         idx = h1;
604 #if defined(PETSC_USE_BOPT_g)
605         insert_ct++;
606         total_ct++;
607         if (HT[idx] != key) {
608           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
609           if (idx == size) {
610             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
611             if (idx == h1) {
612               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
613             }
614           }
615         }
616 #else
617         if (HT[idx] != key) {
618           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
619           if (idx == size) {
620             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
621             if (idx == h1) {
622               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
623             }
624           }
625         }
626 #endif
627         /* A HASH table entry is found, so insert the values at the correct address */
628         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
629         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
630       }
631     } else {
632       if (!baij->donotstash) {
633         if (roworiented) {
634           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
635         } else {
636           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
637         }
638       }
639     }
640   }
641 #if defined(PETSC_USE_BOPT_g)
642   baij->ht_total_ct = total_ct;
643   baij->ht_insert_ct = insert_ct;
644 #endif
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
651 {
652   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
653   PetscTruth      roworiented = baij->roworiented;
654   PetscErrorCode  ierr;
655   PetscInt        i,j,ii,jj,row,col;
656   PetscInt        rstart=baij->rstart ;
657   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
658   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
659   PetscReal       tmp;
660   MatScalar       **HD = baij->hd,*baij_a;
661   const MatScalar *v_t,*value;
662 #if defined(PETSC_USE_BOPT_g)
663   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
664 #endif
665 
666   PetscFunctionBegin;
667 
668   if (roworiented) {
669     stepval = (n-1)*bs;
670   } else {
671     stepval = (m-1)*bs;
672   }
673   for (i=0; i<m; i++) {
674 #if defined(PETSC_USE_BOPT_g)
675     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
676     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
677 #endif
678     row   = im[i];
679     v_t   = v + i*bs2;
680     if (row >= rstart && row < rend) {
681       for (j=0; j<n; j++) {
682         col = in[j];
683 
684         /* Look up into the Hash Table */
685         key = row*Nbs+col+1;
686         h1  = HASH(size,key,tmp);
687 
688         idx = h1;
689 #if defined(PETSC_USE_BOPT_g)
690         total_ct++;
691         insert_ct++;
692        if (HT[idx] != key) {
693           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
694           if (idx == size) {
695             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
696             if (idx == h1) {
697               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
698             }
699           }
700         }
701 #else
702         if (HT[idx] != key) {
703           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
704           if (idx == size) {
705             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
706             if (idx == h1) {
707               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
708             }
709           }
710         }
711 #endif
712         baij_a = HD[idx];
713         if (roworiented) {
714           /*value = v + i*(stepval+bs)*bs + j*bs;*/
715           /* value = v + (i*(stepval+bs)+j)*bs; */
716           value = v_t;
717           v_t  += bs;
718           if (addv == ADD_VALUES) {
719             for (ii=0; ii<bs; ii++,value+=stepval) {
720               for (jj=ii; jj<bs2; jj+=bs) {
721                 baij_a[jj]  += *value++;
722               }
723             }
724           } else {
725             for (ii=0; ii<bs; ii++,value+=stepval) {
726               for (jj=ii; jj<bs2; jj+=bs) {
727                 baij_a[jj]  = *value++;
728               }
729             }
730           }
731         } else {
732           value = v + j*(stepval+bs)*bs + i*bs;
733           if (addv == ADD_VALUES) {
734             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
735               for (jj=0; jj<bs; jj++) {
736                 baij_a[jj]  += *value++;
737               }
738             }
739           } else {
740             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
741               for (jj=0; jj<bs; jj++) {
742                 baij_a[jj]  = *value++;
743               }
744             }
745           }
746         }
747       }
748     } else {
749       if (!baij->donotstash) {
750         if (roworiented) {
751           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
752         } else {
753           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
754         }
755       }
756     }
757   }
758 #if defined(PETSC_USE_BOPT_g)
759   baij->ht_total_ct = total_ct;
760   baij->ht_insert_ct = insert_ct;
761 #endif
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatGetValues_MPIBAIJ"
767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
768 {
769   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
770   PetscErrorCode ierr;
771   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
772   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
773 
774   PetscFunctionBegin;
775   for (i=0; i<m; i++) {
776     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
777     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
778     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
779       row = idxm[i] - bsrstart;
780       for (j=0; j<n; j++) {
781         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
782         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
783         if (idxn[j] >= bscstart && idxn[j] < bscend){
784           col = idxn[j] - bscstart;
785           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786         } else {
787           if (!baij->colmap) {
788             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
789           }
790 #if defined (PETSC_USE_CTABLE)
791           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
792           data --;
793 #else
794           data = baij->colmap[idxn[j]/bs]-1;
795 #endif
796           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
797           else {
798             col  = data + idxn[j]%bs;
799             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
800           }
801         }
802       }
803     } else {
804       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
805     }
806   }
807  PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatNorm_MPIBAIJ"
812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
813 {
814   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
815   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
816   PetscErrorCode ierr;
817   PetscInt       i,bs2=baij->bs2;
818   PetscReal      sum = 0.0;
819   MatScalar      *v;
820 
821   PetscFunctionBegin;
822   if (baij->size == 1) {
823     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
824   } else {
825     if (type == NORM_FROBENIUS) {
826       v = amat->a;
827       for (i=0; i<amat->nz*bs2; i++) {
828 #if defined(PETSC_USE_COMPLEX)
829         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
830 #else
831         sum += (*v)*(*v); v++;
832 #endif
833       }
834       v = bmat->a;
835       for (i=0; i<bmat->nz*bs2; i++) {
836 #if defined(PETSC_USE_COMPLEX)
837         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
838 #else
839         sum += (*v)*(*v); v++;
840 #endif
841       }
842       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
843       *nrm = sqrt(*nrm);
844     } else {
845       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
846     }
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 
852 /*
853   Creates the hash table, and sets the table
854   This table is created only once.
855   If new entried need to be added to the matrix
856   then the hash table has to be destroyed and
857   recreated.
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
862 {
863   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
864   Mat            A = baij->A,B=baij->B;
865   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
866   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
867   PetscErrorCode ierr;
868   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
869   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
870   PetscInt       *HT,key;
871   MatScalar      **HD;
872   PetscReal      tmp;
873 #if defined(PETSC_USE_BOPT_g)
874   PetscInt       ct=0,max=0;
875 #endif
876 
877   PetscFunctionBegin;
878   baij->ht_size=(PetscInt)(factor*nz);
879   size = baij->ht_size;
880 
881   if (baij->ht) {
882     PetscFunctionReturn(0);
883   }
884 
885   /* Allocate Memory for Hash Table */
886   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
887   baij->ht = (PetscInt*)(baij->hd + size);
888   HD       = baij->hd;
889   HT       = baij->ht;
890 
891 
892   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
893 
894 
895   /* Loop Over A */
896   for (i=0; i<a->mbs; i++) {
897     for (j=ai[i]; j<ai[i+1]; j++) {
898       row = i+rstart;
899       col = aj[j]+cstart;
900 
901       key = row*Nbs + col + 1;
902       h1  = HASH(size,key,tmp);
903       for (k=0; k<size; k++){
904         if (!HT[(h1+k)%size]) {
905           HT[(h1+k)%size] = key;
906           HD[(h1+k)%size] = a->a + j*bs2;
907           break;
908 #if defined(PETSC_USE_BOPT_g)
909         } else {
910           ct++;
911 #endif
912         }
913       }
914 #if defined(PETSC_USE_BOPT_g)
915       if (k> max) max = k;
916 #endif
917     }
918   }
919   /* Loop Over B */
920   for (i=0; i<b->mbs; i++) {
921     for (j=bi[i]; j<bi[i+1]; j++) {
922       row = i+rstart;
923       col = garray[bj[j]];
924       key = row*Nbs + col + 1;
925       h1  = HASH(size,key,tmp);
926       for (k=0; k<size; k++){
927         if (!HT[(h1+k)%size]) {
928           HT[(h1+k)%size] = key;
929           HD[(h1+k)%size] = b->a + j*bs2;
930           break;
931 #if defined(PETSC_USE_BOPT_g)
932         } else {
933           ct++;
934 #endif
935         }
936       }
937 #if defined(PETSC_USE_BOPT_g)
938       if (k> max) max = k;
939 #endif
940     }
941   }
942 
943   /* Print Summary */
944 #if defined(PETSC_USE_BOPT_g)
945   for (i=0,j=0; i<size; i++) {
946     if (HT[i]) {j++;}
947   }
948   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
949 #endif
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
956 {
957   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
958   PetscErrorCode ierr;
959   PetscInt       nstash,reallocs;
960   InsertMode     addv;
961 
962   PetscFunctionBegin;
963   if (baij->donotstash) {
964     PetscFunctionReturn(0);
965   }
966 
967   /* make sure all processors are either in INSERTMODE or ADDMODE */
968   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
969   if (addv == (ADD_VALUES|INSERT_VALUES)) {
970     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
971   }
972   mat->insertmode = addv; /* in case this processor had no cache */
973 
974   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
975   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
976   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
977   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
978   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
979   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
986 {
987   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
988   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
989   PetscErrorCode ierr;
990   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
991   PetscInt       *row,*col,other_disassembled;
992   PetscTruth     r1,r2,r3;
993   MatScalar      *val;
994   InsertMode     addv = mat->insertmode;
995   PetscMPIInt    n;
996 
997   PetscFunctionBegin;
998   if (!baij->donotstash) {
999     while (1) {
1000       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1001       if (!flg) break;
1002 
1003       for (i=0; i<n;) {
1004         /* Now identify the consecutive vals belonging to the same row */
1005         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1006         if (j < n) ncols = j-i;
1007         else       ncols = n-i;
1008         /* Now assemble all these values with a single function call */
1009         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1010         i = j;
1011       }
1012     }
1013     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1014     /* Now process the block-stash. Since the values are stashed column-oriented,
1015        set the roworiented flag to column oriented, and after MatSetValues()
1016        restore the original flags */
1017     r1 = baij->roworiented;
1018     r2 = a->roworiented;
1019     r3 = b->roworiented;
1020     baij->roworiented = PETSC_FALSE;
1021     a->roworiented    = PETSC_FALSE;
1022     b->roworiented    = PETSC_FALSE;
1023     while (1) {
1024       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1025       if (!flg) break;
1026 
1027       for (i=0; i<n;) {
1028         /* Now identify the consecutive vals belonging to the same row */
1029         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1030         if (j < n) ncols = j-i;
1031         else       ncols = n-i;
1032         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1033         i = j;
1034       }
1035     }
1036     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1037     baij->roworiented = r1;
1038     a->roworiented    = r2;
1039     b->roworiented    = r3;
1040   }
1041 
1042   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1043   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1044 
1045   /* determine if any processor has disassembled, if so we must
1046      also disassemble ourselfs, in order that we may reassemble. */
1047   /*
1048      if nonzero structure of submatrix B cannot change then we know that
1049      no processor disassembled thus we can skip this stuff
1050   */
1051   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1052     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1053     if (mat->was_assembled && !other_disassembled) {
1054       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1055     }
1056   }
1057 
1058   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1059     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1060   }
1061   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1062   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1063 
1064 #if defined(PETSC_USE_BOPT_g)
1065   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1066     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1067     baij->ht_total_ct  = 0;
1068     baij->ht_insert_ct = 0;
1069   }
1070 #endif
1071   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1072     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1073     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1074     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1075   }
1076 
1077   if (baij->rowvalues) {
1078     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1079     baij->rowvalues = 0;
1080   }
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1086 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1087 {
1088   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1089   PetscErrorCode    ierr;
1090   PetscMPIInt       size = baij->size,rank = baij->rank;
1091   PetscInt          bs = mat->bs;
1092   PetscTruth        iascii,isdraw;
1093   PetscViewer       sviewer;
1094   PetscViewerFormat format;
1095 
1096   PetscFunctionBegin;
1097   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1098   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1099   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1100   if (iascii) {
1101     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1102     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1103       MatInfo info;
1104       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1105       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1106       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1107               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1108               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1109       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1110       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1111       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1112       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1113       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1114       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1115       PetscFunctionReturn(0);
1116     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1117       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1118       PetscFunctionReturn(0);
1119     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1120       PetscFunctionReturn(0);
1121     }
1122   }
1123 
1124   if (isdraw) {
1125     PetscDraw       draw;
1126     PetscTruth isnull;
1127     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1128     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1129   }
1130 
1131   if (size == 1) {
1132     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1133     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1134   } else {
1135     /* assemble the entire matrix onto first processor. */
1136     Mat         A;
1137     Mat_SeqBAIJ *Aloc;
1138     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1139     MatScalar   *a;
1140 
1141     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1142     /* Perhaps this should be the type of mat? */
1143     if (!rank) {
1144       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1145     } else {
1146       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1147     }
1148     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1149     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1150     PetscLogObjectParent(mat,A);
1151 
1152     /* copy over the A part */
1153     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1154     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1156 
1157     for (i=0; i<mbs; i++) {
1158       rvals[0] = bs*(baij->rstart + i);
1159       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1160       for (j=ai[i]; j<ai[i+1]; j++) {
1161         col = (baij->cstart+aj[j])*bs;
1162         for (k=0; k<bs; k++) {
1163           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1164           col++; a += bs;
1165         }
1166       }
1167     }
1168     /* copy over the B part */
1169     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1170     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1171     for (i=0; i<mbs; i++) {
1172       rvals[0] = bs*(baij->rstart + i);
1173       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1174       for (j=ai[i]; j<ai[i+1]; j++) {
1175         col = baij->garray[aj[j]]*bs;
1176         for (k=0; k<bs; k++) {
1177           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1178           col++; a += bs;
1179         }
1180       }
1181     }
1182     ierr = PetscFree(rvals);CHKERRQ(ierr);
1183     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1184     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1185     /*
1186        Everyone has to call to draw the matrix since the graphics waits are
1187        synchronized across all processors that share the PetscDraw object
1188     */
1189     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1190     if (!rank) {
1191       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1192       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1193     }
1194     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1195     ierr = MatDestroy(A);CHKERRQ(ierr);
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatView_MPIBAIJ"
1202 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1203 {
1204   PetscErrorCode ierr;
1205   PetscTruth     iascii,isdraw,issocket,isbinary;
1206 
1207   PetscFunctionBegin;
1208   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1209   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1210   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1211   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1212   if (iascii || isdraw || issocket || isbinary) {
1213     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1214   } else {
1215     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1216   }
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1222 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1223 {
1224   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1225   PetscErrorCode ierr;
1226 
1227   PetscFunctionBegin;
1228 #if defined(PETSC_USE_LOG)
1229   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1230 #endif
1231   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1232   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1233   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1234   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1235   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1236 #if defined (PETSC_USE_CTABLE)
1237   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1238 #else
1239   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1240 #endif
1241   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1242   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1243   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1244   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1245   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1246   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1247 #if defined(PETSC_USE_MAT_SINGLE)
1248   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1249 #endif
1250   ierr = PetscFree(baij);CHKERRQ(ierr);
1251 
1252   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1256   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1257   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1258   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatMult_MPIBAIJ"
1264 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1265 {
1266   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1267   PetscErrorCode ierr;
1268   PetscInt       nt;
1269 
1270   PetscFunctionBegin;
1271   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1272   if (nt != A->n) {
1273     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1274   }
1275   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1276   if (nt != A->m) {
1277     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1278   }
1279   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1280   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1281   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1282   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1283   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1289 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1290 {
1291   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1292   PetscErrorCode ierr;
1293 
1294   PetscFunctionBegin;
1295   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1296   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1297   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1298   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1304 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1305 {
1306   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1307   PetscErrorCode ierr;
1308 
1309   PetscFunctionBegin;
1310   /* do nondiagonal part */
1311   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1312   /* send it on its way */
1313   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1314   /* do local part */
1315   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1316   /* receive remote parts: note this assumes the values are not actually */
1317   /* inserted in yy until the next line, which is true for my implementation*/
1318   /* but is not perhaps always true. */
1319   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1325 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1326 {
1327   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329 
1330   PetscFunctionBegin;
1331   /* do nondiagonal part */
1332   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1333   /* send it on its way */
1334   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1335   /* do local part */
1336   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1337   /* receive remote parts: note this assumes the values are not actually */
1338   /* inserted in yy until the next line, which is true for my implementation*/
1339   /* but is not perhaps always true. */
1340   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 /*
1345   This only works correctly for square matrices where the subblock A->A is the
1346    diagonal block
1347 */
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1350 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1351 {
1352   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1353   PetscErrorCode ierr;
1354 
1355   PetscFunctionBegin;
1356   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1357   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatScale_MPIBAIJ"
1363 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1364 {
1365   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1366   PetscErrorCode ierr;
1367 
1368   PetscFunctionBegin;
1369   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1370   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1376 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1377 {
1378   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1379   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1380   PetscErrorCode ierr;
1381   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1382   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1383   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1384 
1385   PetscFunctionBegin;
1386   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1387   mat->getrowactive = PETSC_TRUE;
1388 
1389   if (!mat->rowvalues && (idx || v)) {
1390     /*
1391         allocate enough space to hold information from the longest row.
1392     */
1393     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1394     PetscInt     max = 1,mbs = mat->mbs,tmp;
1395     for (i=0; i<mbs; i++) {
1396       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1397       if (max < tmp) { max = tmp; }
1398     }
1399     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1400     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1401   }
1402 
1403   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1404   lrow = row - brstart;
1405 
1406   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1407   if (!v)   {pvA = 0; pvB = 0;}
1408   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1409   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1410   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1411   nztot = nzA + nzB;
1412 
1413   cmap  = mat->garray;
1414   if (v  || idx) {
1415     if (nztot) {
1416       /* Sort by increasing column numbers, assuming A and B already sorted */
1417       PetscInt imark = -1;
1418       if (v) {
1419         *v = v_p = mat->rowvalues;
1420         for (i=0; i<nzB; i++) {
1421           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1422           else break;
1423         }
1424         imark = i;
1425         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1426         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1427       }
1428       if (idx) {
1429         *idx = idx_p = mat->rowindices;
1430         if (imark > -1) {
1431           for (i=0; i<imark; i++) {
1432             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1433           }
1434         } else {
1435           for (i=0; i<nzB; i++) {
1436             if (cmap[cworkB[i]/bs] < cstart)
1437               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1438             else break;
1439           }
1440           imark = i;
1441         }
1442         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1443         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1444       }
1445     } else {
1446       if (idx) *idx = 0;
1447       if (v)   *v   = 0;
1448     }
1449   }
1450   *nz = nztot;
1451   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1452   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1458 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1459 {
1460   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1461 
1462   PetscFunctionBegin;
1463   if (baij->getrowactive == PETSC_FALSE) {
1464     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1465   }
1466   baij->getrowactive = PETSC_FALSE;
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1472 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1473 {
1474   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1475   PetscErrorCode ierr;
1476 
1477   PetscFunctionBegin;
1478   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1479   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1485 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1486 {
1487   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1488   Mat            A = a->A,B = a->B;
1489   PetscErrorCode ierr;
1490   PetscReal      isend[5],irecv[5];
1491 
1492   PetscFunctionBegin;
1493   info->block_size     = (PetscReal)matin->bs;
1494   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1495   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1496   isend[3] = info->memory;  isend[4] = info->mallocs;
1497   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1498   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1499   isend[3] += info->memory;  isend[4] += info->mallocs;
1500   if (flag == MAT_LOCAL) {
1501     info->nz_used      = isend[0];
1502     info->nz_allocated = isend[1];
1503     info->nz_unneeded  = isend[2];
1504     info->memory       = isend[3];
1505     info->mallocs      = isend[4];
1506   } else if (flag == MAT_GLOBAL_MAX) {
1507     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1508     info->nz_used      = irecv[0];
1509     info->nz_allocated = irecv[1];
1510     info->nz_unneeded  = irecv[2];
1511     info->memory       = irecv[3];
1512     info->mallocs      = irecv[4];
1513   } else if (flag == MAT_GLOBAL_SUM) {
1514     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1515     info->nz_used      = irecv[0];
1516     info->nz_allocated = irecv[1];
1517     info->nz_unneeded  = irecv[2];
1518     info->memory       = irecv[3];
1519     info->mallocs      = irecv[4];
1520   } else {
1521     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1522   }
1523   info->rows_global       = (PetscReal)A->M;
1524   info->columns_global    = (PetscReal)A->N;
1525   info->rows_local        = (PetscReal)A->m;
1526   info->columns_local     = (PetscReal)A->N;
1527   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1528   info->fill_ratio_needed = 0;
1529   info->factor_mallocs    = 0;
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1535 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1536 {
1537   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1538   PetscErrorCode ierr;
1539 
1540   PetscFunctionBegin;
1541   switch (op) {
1542   case MAT_NO_NEW_NONZERO_LOCATIONS:
1543   case MAT_YES_NEW_NONZERO_LOCATIONS:
1544   case MAT_COLUMNS_UNSORTED:
1545   case MAT_COLUMNS_SORTED:
1546   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1547   case MAT_KEEP_ZEROED_ROWS:
1548   case MAT_NEW_NONZERO_LOCATION_ERR:
1549     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1550     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1551     break;
1552   case MAT_ROW_ORIENTED:
1553     a->roworiented = PETSC_TRUE;
1554     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1555     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1556     break;
1557   case MAT_ROWS_SORTED:
1558   case MAT_ROWS_UNSORTED:
1559   case MAT_YES_NEW_DIAGONALS:
1560     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1561     break;
1562   case MAT_COLUMN_ORIENTED:
1563     a->roworiented = PETSC_FALSE;
1564     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1565     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1566     break;
1567   case MAT_IGNORE_OFF_PROC_ENTRIES:
1568     a->donotstash = PETSC_TRUE;
1569     break;
1570   case MAT_NO_NEW_DIAGONALS:
1571     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1572   case MAT_USE_HASH_TABLE:
1573     a->ht_flag = PETSC_TRUE;
1574     break;
1575   case MAT_SYMMETRIC:
1576   case MAT_STRUCTURALLY_SYMMETRIC:
1577   case MAT_HERMITIAN:
1578   case MAT_SYMMETRY_ETERNAL:
1579     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1580     break;
1581   case MAT_NOT_SYMMETRIC:
1582   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1583   case MAT_NOT_HERMITIAN:
1584   case MAT_NOT_SYMMETRY_ETERNAL:
1585     break;
1586   default:
1587     SETERRQ(PETSC_ERR_SUP,"unknown option");
1588   }
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1594 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1595 {
1596   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1597   Mat_SeqBAIJ    *Aloc;
1598   Mat            B;
1599   PetscErrorCode ierr;
1600   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1601   PetscInt       bs=A->bs,mbs=baij->mbs;
1602   MatScalar      *a;
1603 
1604   PetscFunctionBegin;
1605   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1606   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1607   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1608   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1609 
1610   /* copy over the A part */
1611   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1612   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1613   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1614 
1615   for (i=0; i<mbs; i++) {
1616     rvals[0] = bs*(baij->rstart + i);
1617     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1618     for (j=ai[i]; j<ai[i+1]; j++) {
1619       col = (baij->cstart+aj[j])*bs;
1620       for (k=0; k<bs; k++) {
1621         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1622         col++; a += bs;
1623       }
1624     }
1625   }
1626   /* copy over the B part */
1627   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1628   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1629   for (i=0; i<mbs; i++) {
1630     rvals[0] = bs*(baij->rstart + i);
1631     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1632     for (j=ai[i]; j<ai[i+1]; j++) {
1633       col = baij->garray[aj[j]]*bs;
1634       for (k=0; k<bs; k++) {
1635         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1636         col++; a += bs;
1637       }
1638     }
1639   }
1640   ierr = PetscFree(rvals);CHKERRQ(ierr);
1641   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643 
1644   if (matout) {
1645     *matout = B;
1646   } else {
1647     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1648   }
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1654 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1655 {
1656   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1657   Mat            a = baij->A,b = baij->B;
1658   PetscErrorCode ierr;
1659   PetscInt       s1,s2,s3;
1660 
1661   PetscFunctionBegin;
1662   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1663   if (rr) {
1664     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1665     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1666     /* Overlap communication with computation. */
1667     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1668   }
1669   if (ll) {
1670     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1671     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1672     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1673   }
1674   /* scale  the diagonal block */
1675   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1676 
1677   if (rr) {
1678     /* Do a scatter end and then right scale the off-diagonal block */
1679     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1680     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1681   }
1682 
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1688 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1689 {
1690   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1691   PetscErrorCode ierr;
1692   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1693   PetscInt       i,N,*rows,*owners = l->rowners;
1694   PetscInt       *nprocs,j,idx,nsends,row;
1695   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1696   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
1697   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1698   MPI_Comm       comm = A->comm;
1699   MPI_Request    *send_waits,*recv_waits;
1700   MPI_Status     recv_status,*send_status;
1701   IS             istmp;
1702   PetscTruth     found;
1703 
1704   PetscFunctionBegin;
1705   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1706   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1707 
1708   /*  first count number of contributors to each processor */
1709   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1710   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1711   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1712   for (i=0; i<N; i++) {
1713     idx   = rows[i];
1714     found = PETSC_FALSE;
1715     for (j=0; j<size; j++) {
1716       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1717         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1718       }
1719     }
1720     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1721   }
1722   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1723 
1724   /* inform other processors of number of messages and max length*/
1725   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1726 
1727   /* post receives:   */
1728   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1729   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1730   for (i=0; i<nrecvs; i++) {
1731     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1732   }
1733 
1734   /* do sends:
1735      1) starts[i] gives the starting index in svalues for stuff going to
1736      the ith processor
1737   */
1738   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1739   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1740   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1741   starts[0]  = 0;
1742   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1743   for (i=0; i<N; i++) {
1744     svalues[starts[owner[i]]++] = rows[i];
1745   }
1746   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1747 
1748   starts[0] = 0;
1749   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1750   count = 0;
1751   for (i=0; i<size; i++) {
1752     if (nprocs[2*i+1]) {
1753       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1754     }
1755   }
1756   ierr = PetscFree(starts);CHKERRQ(ierr);
1757 
1758   base = owners[rank]*bs;
1759 
1760   /*  wait on receives */
1761   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1762   source = lens + nrecvs;
1763   count  = nrecvs; slen = 0;
1764   while (count) {
1765     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1766     /* unpack receives into our local space */
1767     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1768     source[imdex]  = recv_status.MPI_SOURCE;
1769     lens[imdex]    = n;
1770     slen          += n;
1771     count--;
1772   }
1773   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1774 
1775   /* move the data into the send scatter */
1776   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1777   count = 0;
1778   for (i=0; i<nrecvs; i++) {
1779     values = rvalues + i*nmax;
1780     for (j=0; j<lens[i]; j++) {
1781       lrows[count++] = values[j] - base;
1782     }
1783   }
1784   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1785   ierr = PetscFree(lens);CHKERRQ(ierr);
1786   ierr = PetscFree(owner);CHKERRQ(ierr);
1787   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1788 
1789   /* actually zap the local rows */
1790   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1791   PetscLogObjectParent(A,istmp);
1792 
1793   /*
1794         Zero the required rows. If the "diagonal block" of the matrix
1795      is square and the user wishes to set the diagonal we use seperate
1796      code so that MatSetValues() is not called for each diagonal allocating
1797      new memory, thus calling lots of mallocs and slowing things down.
1798 
1799        Contributed by: Mathew Knepley
1800   */
1801   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1802   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1803   if (diag && (l->A->M == l->A->N)) {
1804     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1805   } else if (diag) {
1806     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1807     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1808       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1809 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1810     }
1811     for (i=0; i<slen; i++) {
1812       row  = lrows[i] + rstart_bs;
1813       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1814     }
1815     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1816     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1817   } else {
1818     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1819   }
1820 
1821   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1822   ierr = PetscFree(lrows);CHKERRQ(ierr);
1823 
1824   /* wait on sends */
1825   if (nsends) {
1826     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1827     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1828     ierr = PetscFree(send_status);CHKERRQ(ierr);
1829   }
1830   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1831   ierr = PetscFree(svalues);CHKERRQ(ierr);
1832 
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1838 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1839 {
1840   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1841   MPI_Comm          comm = A->comm;
1842   static PetscTruth called = PETSC_FALSE;
1843   PetscErrorCode    ierr;
1844 
1845   PetscFunctionBegin;
1846   if (!a->rank) {
1847     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1848   }
1849   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1850   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1851   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1857 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1858 {
1859   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1860   PetscErrorCode ierr;
1861 
1862   PetscFunctionBegin;
1863   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatEqual_MPIBAIJ"
1871 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1872 {
1873   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1874   Mat            a,b,c,d;
1875   PetscTruth     flg;
1876   PetscErrorCode ierr;
1877 
1878   PetscFunctionBegin;
1879   a = matA->A; b = matA->B;
1880   c = matB->A; d = matB->B;
1881 
1882   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1883   if (flg == PETSC_TRUE) {
1884     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1885   }
1886   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1893 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1894 {
1895   PetscErrorCode ierr;
1896 
1897   PetscFunctionBegin;
1898   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 /* -------------------------------------------------------------------*/
1903 static struct _MatOps MatOps_Values = {
1904        MatSetValues_MPIBAIJ,
1905        MatGetRow_MPIBAIJ,
1906        MatRestoreRow_MPIBAIJ,
1907        MatMult_MPIBAIJ,
1908 /* 4*/ MatMultAdd_MPIBAIJ,
1909        MatMultTranspose_MPIBAIJ,
1910        MatMultTransposeAdd_MPIBAIJ,
1911        0,
1912        0,
1913        0,
1914 /*10*/ 0,
1915        0,
1916        0,
1917        0,
1918        MatTranspose_MPIBAIJ,
1919 /*15*/ MatGetInfo_MPIBAIJ,
1920        MatEqual_MPIBAIJ,
1921        MatGetDiagonal_MPIBAIJ,
1922        MatDiagonalScale_MPIBAIJ,
1923        MatNorm_MPIBAIJ,
1924 /*20*/ MatAssemblyBegin_MPIBAIJ,
1925        MatAssemblyEnd_MPIBAIJ,
1926        0,
1927        MatSetOption_MPIBAIJ,
1928        MatZeroEntries_MPIBAIJ,
1929 /*25*/ MatZeroRows_MPIBAIJ,
1930        0,
1931        0,
1932        0,
1933        0,
1934 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1935        0,
1936        0,
1937        0,
1938        0,
1939 /*35*/ MatDuplicate_MPIBAIJ,
1940        0,
1941        0,
1942        0,
1943        0,
1944 /*40*/ 0,
1945        MatGetSubMatrices_MPIBAIJ,
1946        MatIncreaseOverlap_MPIBAIJ,
1947        MatGetValues_MPIBAIJ,
1948        0,
1949 /*45*/ MatPrintHelp_MPIBAIJ,
1950        MatScale_MPIBAIJ,
1951        0,
1952        0,
1953        0,
1954 /*50*/ 0,
1955        0,
1956        0,
1957        0,
1958        0,
1959 /*55*/ 0,
1960        0,
1961        MatSetUnfactored_MPIBAIJ,
1962        0,
1963        MatSetValuesBlocked_MPIBAIJ,
1964 /*60*/ 0,
1965        MatDestroy_MPIBAIJ,
1966        MatView_MPIBAIJ,
1967        MatGetPetscMaps_Petsc,
1968        0,
1969 /*65*/ 0,
1970        0,
1971        0,
1972        0,
1973        0,
1974 /*70*/ MatGetRowMax_MPIBAIJ,
1975        0,
1976        0,
1977        0,
1978        0,
1979 /*75*/ 0,
1980        0,
1981        0,
1982        0,
1983        0,
1984 /*80*/ 0,
1985        0,
1986        0,
1987        0,
1988        MatLoad_MPIBAIJ,
1989 /*85*/ 0,
1990        0,
1991        0,
1992        0,
1993        0,
1994 /*90*/ 0,
1995        0,
1996        0,
1997        0,
1998        0,
1999 /*95*/ 0,
2000        0,
2001        0,
2002        0};
2003 
2004 
2005 EXTERN_C_BEGIN
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2008 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2009 {
2010   PetscFunctionBegin;
2011   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2012   *iscopy = PETSC_FALSE;
2013   PetscFunctionReturn(0);
2014 }
2015 EXTERN_C_END
2016 
2017 EXTERN_C_BEGIN
2018 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
2019 EXTERN_C_END
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2023 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2024 {
2025   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2026   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2027   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2028   const PetscInt *JJ;
2029   PetscScalar    *values;
2030   PetscErrorCode ierr;
2031 
2032   PetscFunctionBegin;
2033 #if defined(PETSC_OPT_g)
2034   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2035 #endif
2036   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2037   o_nnz = d_nnz + m;
2038 
2039   for (i=0; i<m; i++) {
2040     nnz     = I[i+1]- I[i];
2041     JJ      = J + I[i];
2042     nnz_max = PetscMax(nnz_max,nnz);
2043 #if defined(PETSC_OPT_g)
2044     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2045 #endif
2046     for (j=0; j<nnz; j++) {
2047       if (*JJ >= cstart) break;
2048       JJ++;
2049     }
2050     d = 0;
2051     for (; j<nnz; j++) {
2052       if (*JJ++ >= cend) break;
2053       d++;
2054     }
2055     d_nnz[i] = d;
2056     o_nnz[i] = nnz - d;
2057   }
2058   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2059   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2060 
2061   if (v) values = (PetscScalar*)v;
2062   else {
2063     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2064     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2065   }
2066 
2067   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2068   for (i=0; i<m; i++) {
2069     ii   = i + rstart;
2070     nnz  = I[i+1]- I[i];
2071     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2072   }
2073   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2074   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2075   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2076 
2077   if (!v) {
2078     ierr = PetscFree(values);CHKERRQ(ierr);
2079   }
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2085 /*@C
2086    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2087    (the default parallel PETSc format).
2088 
2089    Collective on MPI_Comm
2090 
2091    Input Parameters:
2092 +  A - the matrix
2093 .  i - the indices into j for the start of each local row (starts with zero)
2094 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2095 -  v - optional values in the matrix
2096 
2097    Level: developer
2098 
2099 .keywords: matrix, aij, compressed row, sparse, parallel
2100 
2101 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2102 @*/
2103 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2104 {
2105   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2106 
2107   PetscFunctionBegin;
2108   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2109   if (f) {
2110     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2111   }
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 EXTERN_C_BEGIN
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2118 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2119 {
2120   Mat_MPIBAIJ    *b;
2121   PetscErrorCode ierr;
2122   PetscInt       i;
2123 
2124   PetscFunctionBegin;
2125   B->preallocated = PETSC_TRUE;
2126   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2127 
2128   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2129   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2130   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2131   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2132   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2133   if (d_nnz) {
2134   for (i=0; i<B->m/bs; i++) {
2135       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2136     }
2137   }
2138   if (o_nnz) {
2139     for (i=0; i<B->m/bs; i++) {
2140       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2141     }
2142   }
2143 
2144   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2145   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2146   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2147   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2148 
2149   b = (Mat_MPIBAIJ*)B->data;
2150   B->bs  = bs;
2151   b->bs2 = bs*bs;
2152   b->mbs = B->m/bs;
2153   b->nbs = B->n/bs;
2154   b->Mbs = B->M/bs;
2155   b->Nbs = B->N/bs;
2156 
2157   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2158   b->rowners[0]    = 0;
2159   for (i=2; i<=b->size; i++) {
2160     b->rowners[i] += b->rowners[i-1];
2161   }
2162   b->rstart    = b->rowners[b->rank];
2163   b->rend      = b->rowners[b->rank+1];
2164 
2165   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2166   b->cowners[0] = 0;
2167   for (i=2; i<=b->size; i++) {
2168     b->cowners[i] += b->cowners[i-1];
2169   }
2170   b->cstart    = b->cowners[b->rank];
2171   b->cend      = b->cowners[b->rank+1];
2172 
2173   for (i=0; i<=b->size; i++) {
2174     b->rowners_bs[i] = b->rowners[i]*bs;
2175   }
2176   b->rstart_bs = b->rstart*bs;
2177   b->rend_bs   = b->rend*bs;
2178   b->cstart_bs = b->cstart*bs;
2179   b->cend_bs   = b->cend*bs;
2180 
2181   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2182   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2183   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2184   PetscLogObjectParent(B,b->A);
2185   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2186   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2187   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2188   PetscLogObjectParent(B,b->B);
2189 
2190   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2191 
2192   PetscFunctionReturn(0);
2193 }
2194 EXTERN_C_END
2195 
2196 EXTERN_C_BEGIN
2197 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2198 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2199 EXTERN_C_END
2200 
2201 /*MC
2202    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2203 
2204    Options Database Keys:
2205 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2206 
2207   Level: beginner
2208 
2209 .seealso: MatCreateMPIBAIJ
2210 M*/
2211 
2212 EXTERN_C_BEGIN
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatCreate_MPIBAIJ"
2215 PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2216 {
2217   Mat_MPIBAIJ    *b;
2218   PetscErrorCode ierr;
2219   PetscTruth     flg;
2220 
2221   PetscFunctionBegin;
2222   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2223   B->data = (void*)b;
2224 
2225   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2226   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2227   B->mapping    = 0;
2228   B->factor     = 0;
2229   B->assembled  = PETSC_FALSE;
2230 
2231   B->insertmode = NOT_SET_VALUES;
2232   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2233   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2234 
2235   /* build local table of row and column ownerships */
2236   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2237   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2238   b->cowners    = b->rowners + b->size + 2;
2239   b->rowners_bs = b->cowners + b->size + 2;
2240 
2241   /* build cache for off array entries formed */
2242   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2243   b->donotstash  = PETSC_FALSE;
2244   b->colmap      = PETSC_NULL;
2245   b->garray      = PETSC_NULL;
2246   b->roworiented = PETSC_TRUE;
2247 
2248 #if defined(PETSC_USE_MAT_SINGLE)
2249   /* stuff for MatSetValues_XXX in single precision */
2250   b->setvalueslen     = 0;
2251   b->setvaluescopy    = PETSC_NULL;
2252 #endif
2253 
2254   /* stuff used in block assembly */
2255   b->barray       = 0;
2256 
2257   /* stuff used for matrix vector multiply */
2258   b->lvec         = 0;
2259   b->Mvctx        = 0;
2260 
2261   /* stuff for MatGetRow() */
2262   b->rowindices   = 0;
2263   b->rowvalues    = 0;
2264   b->getrowactive = PETSC_FALSE;
2265 
2266   /* hash table stuff */
2267   b->ht           = 0;
2268   b->hd           = 0;
2269   b->ht_size      = 0;
2270   b->ht_flag      = PETSC_FALSE;
2271   b->ht_fact      = 0;
2272   b->ht_total_ct  = 0;
2273   b->ht_insert_ct = 0;
2274 
2275   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2276   if (flg) {
2277     PetscReal fact = 1.39;
2278     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2279     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2280     if (fact <= 1.0) fact = 1.39;
2281     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2282     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2283   }
2284   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2285                                      "MatStoreValues_MPIBAIJ",
2286                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2287   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2288                                      "MatRetrieveValues_MPIBAIJ",
2289                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2290   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2291                                      "MatGetDiagonalBlock_MPIBAIJ",
2292                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2293   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2294                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2295                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2296   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2297 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2298 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2299   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2300                                      "MatDiagonalScaleLocal_MPIBAIJ",
2301                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2302   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2303                                      "MatSetHashTableFactor_MPIBAIJ",
2304                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 EXTERN_C_END
2308 
2309 /*MC
2310    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2311 
2312    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2313    and MATMPIBAIJ otherwise.
2314 
2315    Options Database Keys:
2316 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2317 
2318   Level: beginner
2319 
2320 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2321 M*/
2322 
2323 EXTERN_C_BEGIN
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatCreate_BAIJ"
2326 PetscErrorCode MatCreate_BAIJ(Mat A)
2327 {
2328   PetscErrorCode ierr;
2329   PetscMPIInt    size;
2330 
2331   PetscFunctionBegin;
2332   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2333   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2334   if (size == 1) {
2335     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2336   } else {
2337     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2338   }
2339   PetscFunctionReturn(0);
2340 }
2341 EXTERN_C_END
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2345 /*@C
2346    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2347    (block compressed row).  For good matrix assembly performance
2348    the user should preallocate the matrix storage by setting the parameters
2349    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2350    performance can be increased by more than a factor of 50.
2351 
2352    Collective on Mat
2353 
2354    Input Parameters:
2355 +  A - the matrix
2356 .  bs   - size of blockk
2357 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2358            submatrix  (same for all local rows)
2359 .  d_nnz - array containing the number of block nonzeros in the various block rows
2360            of the in diagonal portion of the local (possibly different for each block
2361            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2362 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2363            submatrix (same for all local rows).
2364 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2365            off-diagonal portion of the local submatrix (possibly different for
2366            each block row) or PETSC_NULL.
2367 
2368    If the *_nnz parameter is given then the *_nz parameter is ignored
2369 
2370    Options Database Keys:
2371 .   -mat_no_unroll - uses code that does not unroll the loops in the
2372                      block calculations (much slower)
2373 .   -mat_block_size - size of the blocks to use
2374 
2375    Notes:
2376    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2377    than it must be used on all processors that share the object for that argument.
2378 
2379    Storage Information:
2380    For a square global matrix we define each processor's diagonal portion
2381    to be its local rows and the corresponding columns (a square submatrix);
2382    each processor's off-diagonal portion encompasses the remainder of the
2383    local matrix (a rectangular submatrix).
2384 
2385    The user can specify preallocated storage for the diagonal part of
2386    the local submatrix with either d_nz or d_nnz (not both).  Set
2387    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2388    memory allocation.  Likewise, specify preallocated storage for the
2389    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2390 
2391    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2392    the figure below we depict these three local rows and all columns (0-11).
2393 
2394 .vb
2395            0 1 2 3 4 5 6 7 8 9 10 11
2396           -------------------
2397    row 3  |  o o o d d d o o o o o o
2398    row 4  |  o o o d d d o o o o o o
2399    row 5  |  o o o d d d o o o o o o
2400           -------------------
2401 .ve
2402 
2403    Thus, any entries in the d locations are stored in the d (diagonal)
2404    submatrix, and any entries in the o locations are stored in the
2405    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2406    stored simply in the MATSEQBAIJ format for compressed row storage.
2407 
2408    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2409    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2410    In general, for PDE problems in which most nonzeros are near the diagonal,
2411    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2412    or you will get TERRIBLE performance; see the users' manual chapter on
2413    matrices.
2414 
2415    Level: intermediate
2416 
2417 .keywords: matrix, block, aij, compressed row, sparse, parallel
2418 
2419 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2420 @*/
2421 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2422 {
2423   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2424 
2425   PetscFunctionBegin;
2426   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2427   if (f) {
2428     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2429   }
2430   PetscFunctionReturn(0);
2431 }
2432 
2433 #undef __FUNCT__
2434 #define __FUNCT__ "MatCreateMPIBAIJ"
2435 /*@C
2436    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2437    (block compressed row).  For good matrix assembly performance
2438    the user should preallocate the matrix storage by setting the parameters
2439    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2440    performance can be increased by more than a factor of 50.
2441 
2442    Collective on MPI_Comm
2443 
2444    Input Parameters:
2445 +  comm - MPI communicator
2446 .  bs   - size of blockk
2447 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2448            This value should be the same as the local size used in creating the
2449            y vector for the matrix-vector product y = Ax.
2450 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2451            This value should be the same as the local size used in creating the
2452            x vector for the matrix-vector product y = Ax.
2453 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2454 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2455 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2456            submatrix  (same for all local rows)
2457 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2458            of the in diagonal portion of the local (possibly different for each block
2459            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2460 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2461            submatrix (same for all local rows).
2462 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2463            off-diagonal portion of the local submatrix (possibly different for
2464            each block row) or PETSC_NULL.
2465 
2466    Output Parameter:
2467 .  A - the matrix
2468 
2469    Options Database Keys:
2470 .   -mat_no_unroll - uses code that does not unroll the loops in the
2471                      block calculations (much slower)
2472 .   -mat_block_size - size of the blocks to use
2473 
2474    Notes:
2475    If the *_nnz parameter is given then the *_nz parameter is ignored
2476 
2477    A nonzero block is any block that as 1 or more nonzeros in it
2478 
2479    The user MUST specify either the local or global matrix dimensions
2480    (possibly both).
2481 
2482    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2483    than it must be used on all processors that share the object for that argument.
2484 
2485    Storage Information:
2486    For a square global matrix we define each processor's diagonal portion
2487    to be its local rows and the corresponding columns (a square submatrix);
2488    each processor's off-diagonal portion encompasses the remainder of the
2489    local matrix (a rectangular submatrix).
2490 
2491    The user can specify preallocated storage for the diagonal part of
2492    the local submatrix with either d_nz or d_nnz (not both).  Set
2493    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2494    memory allocation.  Likewise, specify preallocated storage for the
2495    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2496 
2497    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2498    the figure below we depict these three local rows and all columns (0-11).
2499 
2500 .vb
2501            0 1 2 3 4 5 6 7 8 9 10 11
2502           -------------------
2503    row 3  |  o o o d d d o o o o o o
2504    row 4  |  o o o d d d o o o o o o
2505    row 5  |  o o o d d d o o o o o o
2506           -------------------
2507 .ve
2508 
2509    Thus, any entries in the d locations are stored in the d (diagonal)
2510    submatrix, and any entries in the o locations are stored in the
2511    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2512    stored simply in the MATSEQBAIJ format for compressed row storage.
2513 
2514    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2515    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2516    In general, for PDE problems in which most nonzeros are near the diagonal,
2517    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2518    or you will get TERRIBLE performance; see the users' manual chapter on
2519    matrices.
2520 
2521    Level: intermediate
2522 
2523 .keywords: matrix, block, aij, compressed row, sparse, parallel
2524 
2525 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2526 @*/
2527 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2528 {
2529   PetscErrorCode ierr;
2530   PetscMPIInt    size;
2531 
2532   PetscFunctionBegin;
2533   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2534   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2535   if (size > 1) {
2536     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2537     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2538   } else {
2539     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2540     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2541   }
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 #undef __FUNCT__
2546 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2547 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2548 {
2549   Mat            mat;
2550   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2551   PetscErrorCode ierr;
2552   PetscInt       len=0;
2553 
2554   PetscFunctionBegin;
2555   *newmat       = 0;
2556   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2557   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2558   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2559 
2560   mat->factor       = matin->factor;
2561   mat->preallocated = PETSC_TRUE;
2562   mat->assembled    = PETSC_TRUE;
2563   mat->insertmode   = NOT_SET_VALUES;
2564 
2565   a      = (Mat_MPIBAIJ*)mat->data;
2566   mat->bs  = matin->bs;
2567   a->bs2   = oldmat->bs2;
2568   a->mbs   = oldmat->mbs;
2569   a->nbs   = oldmat->nbs;
2570   a->Mbs   = oldmat->Mbs;
2571   a->Nbs   = oldmat->Nbs;
2572 
2573   a->rstart       = oldmat->rstart;
2574   a->rend         = oldmat->rend;
2575   a->cstart       = oldmat->cstart;
2576   a->cend         = oldmat->cend;
2577   a->size         = oldmat->size;
2578   a->rank         = oldmat->rank;
2579   a->donotstash   = oldmat->donotstash;
2580   a->roworiented  = oldmat->roworiented;
2581   a->rowindices   = 0;
2582   a->rowvalues    = 0;
2583   a->getrowactive = PETSC_FALSE;
2584   a->barray       = 0;
2585   a->rstart_bs    = oldmat->rstart_bs;
2586   a->rend_bs      = oldmat->rend_bs;
2587   a->cstart_bs    = oldmat->cstart_bs;
2588   a->cend_bs      = oldmat->cend_bs;
2589 
2590   /* hash table stuff */
2591   a->ht           = 0;
2592   a->hd           = 0;
2593   a->ht_size      = 0;
2594   a->ht_flag      = oldmat->ht_flag;
2595   a->ht_fact      = oldmat->ht_fact;
2596   a->ht_total_ct  = 0;
2597   a->ht_insert_ct = 0;
2598 
2599   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2600   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2601   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2602   if (oldmat->colmap) {
2603 #if defined (PETSC_USE_CTABLE)
2604   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2605 #else
2606   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2607   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2608   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2609 #endif
2610   } else a->colmap = 0;
2611 
2612   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2613     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2614     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2615     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2616   } else a->garray = 0;
2617 
2618   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2619   PetscLogObjectParent(mat,a->lvec);
2620   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2621   PetscLogObjectParent(mat,a->Mvctx);
2622 
2623   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2624   PetscLogObjectParent(mat,a->A);
2625   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2626   PetscLogObjectParent(mat,a->B);
2627   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2628   *newmat = mat;
2629 
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 #include "petscsys.h"
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatLoad_MPIBAIJ"
2637 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2638 {
2639   Mat            A;
2640   PetscErrorCode ierr;
2641   int            fd;
2642   PetscInt       i,nz,j,rstart,rend;
2643   PetscScalar    *vals,*buf;
2644   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2645   MPI_Status     status;
2646   PetscMPIInt    rank,size,maxnz;
2647   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2648   PetscInt       *locrowlens,*procsnz = 0,*browners;
2649   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows;
2650   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2651   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2652   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2653 
2654   PetscFunctionBegin;
2655   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2656 
2657   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2658   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2659   if (!rank) {
2660     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2661     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2662     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2663   }
2664 
2665   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2666   M = header[1]; N = header[2];
2667 
2668   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2669 
2670   /*
2671      This code adds extra rows to make sure the number of rows is
2672      divisible by the blocksize
2673   */
2674   Mbs        = M/bs;
2675   extra_rows = bs - M + bs*Mbs;
2676   if (extra_rows == bs) extra_rows = 0;
2677   else                  Mbs++;
2678   if (extra_rows && !rank) {
2679     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2680   }
2681 
2682   /* determine ownership of all rows */
2683   mbs        = Mbs/size + ((Mbs % size) > rank);
2684   m          = mbs*bs;
2685   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2686   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2687   rowners[0] = 0;
2688   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2689   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2690   rstart = rowners[rank];
2691   rend   = rowners[rank+1];
2692 
2693   /* distribute row lengths to all processors */
2694   ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2695   if (!rank) {
2696     mend = m;
2697     if (size == 1) mend = mend - extra_rows;
2698     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2699     for (j=mend; j<m; j++) locrowlens[j] = 1;
2700     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2701     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2702     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2703     for (j=0; j<m; j++) {
2704       procsnz[0] += locrowlens[j];
2705     }
2706     for (i=1; i<size; i++) {
2707       mend = browners[i+1] - browners[i];
2708       if (i == size-1) mend = mend - extra_rows;
2709       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2710       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2711       /* calculate the number of nonzeros on each processor */
2712       for (j=0; j<browners[i+1]-browners[i]; j++) {
2713         procsnz[i] += rowlengths[j];
2714       }
2715       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2716     }
2717     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2718   } else {
2719     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2720   }
2721 
2722   if (!rank) {
2723     /* determine max buffer needed and allocate it */
2724     maxnz = procsnz[0];
2725     for (i=1; i<size; i++) {
2726       maxnz = PetscMax(maxnz,procsnz[i]);
2727     }
2728     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2729 
2730     /* read in my part of the matrix column indices  */
2731     nz     = procsnz[0];
2732     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2733     mycols = ibuf;
2734     if (size == 1)  nz -= extra_rows;
2735     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2736     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2737 
2738     /* read in every ones (except the last) and ship off */
2739     for (i=1; i<size-1; i++) {
2740       nz   = procsnz[i];
2741       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2742       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2743     }
2744     /* read in the stuff for the last proc */
2745     if (size != 1) {
2746       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2747       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2748       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2749       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2750     }
2751     ierr = PetscFree(cols);CHKERRQ(ierr);
2752   } else {
2753     /* determine buffer space needed for message */
2754     nz = 0;
2755     for (i=0; i<m; i++) {
2756       nz += locrowlens[i];
2757     }
2758     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2759     mycols = ibuf;
2760     /* receive message of column indices*/
2761     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2762     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2763     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2764   }
2765 
2766   /* loop over local rows, determining number of off diagonal entries */
2767   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2768   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2769   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2770   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2771   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2772   rowcount = 0; nzcount = 0;
2773   for (i=0; i<mbs; i++) {
2774     dcount  = 0;
2775     odcount = 0;
2776     for (j=0; j<bs; j++) {
2777       kmax = locrowlens[rowcount];
2778       for (k=0; k<kmax; k++) {
2779         tmp = mycols[nzcount++]/bs;
2780         if (!mask[tmp]) {
2781           mask[tmp] = 1;
2782           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2783           else masked1[dcount++] = tmp;
2784         }
2785       }
2786       rowcount++;
2787     }
2788 
2789     dlens[i]  = dcount;
2790     odlens[i] = odcount;
2791 
2792     /* zero out the mask elements we set */
2793     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2794     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2795   }
2796 
2797   /* create our matrix */
2798   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2799   ierr = MatSetType(A,type);CHKERRQ(ierr)
2800   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2801 
2802   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2803   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2804 
2805   if (!rank) {
2806     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2807     /* read in my part of the matrix numerical values  */
2808     nz = procsnz[0];
2809     vals = buf;
2810     mycols = ibuf;
2811     if (size == 1)  nz -= extra_rows;
2812     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2813     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2814 
2815     /* insert into matrix */
2816     jj      = rstart*bs;
2817     for (i=0; i<m; i++) {
2818       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2819       mycols += locrowlens[i];
2820       vals   += locrowlens[i];
2821       jj++;
2822     }
2823     /* read in other processors (except the last one) and ship out */
2824     for (i=1; i<size-1; i++) {
2825       nz   = procsnz[i];
2826       vals = buf;
2827       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2828       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2829     }
2830     /* the last proc */
2831     if (size != 1){
2832       nz   = procsnz[i] - extra_rows;
2833       vals = buf;
2834       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2835       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2836       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2837     }
2838     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2839   } else {
2840     /* receive numeric values */
2841     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2842 
2843     /* receive message of values*/
2844     vals   = buf;
2845     mycols = ibuf;
2846     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2847     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2848     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2849 
2850     /* insert into matrix */
2851     jj      = rstart*bs;
2852     for (i=0; i<m; i++) {
2853       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2854       mycols += locrowlens[i];
2855       vals   += locrowlens[i];
2856       jj++;
2857     }
2858   }
2859   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2860   ierr = PetscFree(buf);CHKERRQ(ierr);
2861   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2862   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2863   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2864   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2865   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2866   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2867 
2868   *newmat = A;
2869   PetscFunctionReturn(0);
2870 }
2871 
2872 #undef __FUNCT__
2873 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2874 /*@
2875    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2876 
2877    Input Parameters:
2878 .  mat  - the matrix
2879 .  fact - factor
2880 
2881    Collective on Mat
2882 
2883    Level: advanced
2884 
2885   Notes:
2886    This can also be set by the command line option: -mat_use_hash_table fact
2887 
2888 .keywords: matrix, hashtable, factor, HT
2889 
2890 .seealso: MatSetOption()
2891 @*/
2892 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2893 {
2894   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2895 
2896   PetscFunctionBegin;
2897   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2898   if (f) {
2899     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2900   }
2901   PetscFunctionReturn(0);
2902 }
2903 
2904 #undef __FUNCT__
2905 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2906 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2907 {
2908   Mat_MPIBAIJ *baij;
2909 
2910   PetscFunctionBegin;
2911   baij = (Mat_MPIBAIJ*)mat->data;
2912   baij->ht_fact = fact;
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 #undef __FUNCT__
2917 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2918 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2919 {
2920   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2921   PetscFunctionBegin;
2922   *Ad     = a->A;
2923   *Ao     = a->B;
2924   *colmap = a->garray;
2925   PetscFunctionReturn(0);
2926 }
2927