xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 6ad4291fb3cf799a408e09ec04611269bedac2ad)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 
4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
109   PetscErrorCode ierr;
110   PetscInt       nbs = B->nbs,i,bs=mat->bs;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
286 {
287   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       i,N = m*n;
290   MatScalar      *vsingle;
291 
292   PetscFunctionBegin;
293   if (N > b->setvalueslen) {
294     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
295     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
296     b->setvalueslen  = N;
297   }
298   vsingle = b->setvaluescopy;
299 
300   for (i=0; i<N; i++) {
301     vsingle[i] = v[i];
302   }
303   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
310 {
311   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
312   PetscErrorCode ierr;
313   PetscInt       i,N = m*n*b->bs2;
314   MatScalar      *vsingle;
315 
316   PetscFunctionBegin;
317   if (N > b->setvalueslen) {
318     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
319     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
320     b->setvalueslen  = N;
321   }
322   vsingle = b->setvaluescopy;
323   for (i=0; i<N; i++) {
324     vsingle[i] = v[i];
325   }
326   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
333 {
334   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
335   PetscErrorCode ierr;
336   PetscInt       i,N = m*n;
337   MatScalar      *vsingle;
338 
339   PetscFunctionBegin;
340   if (N > b->setvalueslen) {
341     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
342     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
343     b->setvalueslen  = N;
344   }
345   vsingle = b->setvaluescopy;
346   for (i=0; i<N; i++) {
347     vsingle[i] = v[i];
348   }
349   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
356 {
357   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
358   PetscErrorCode ierr;
359   PetscInt       i,N = m*n*b->bs2;
360   MatScalar      *vsingle;
361 
362   PetscFunctionBegin;
363   if (N > b->setvalueslen) {
364     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
365     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
366     b->setvalueslen  = N;
367   }
368   vsingle = b->setvaluescopy;
369   for (i=0; i<N; i++) {
370     vsingle[i] = v[i];
371   }
372   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 #endif
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
380 {
381   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
382   MatScalar      value;
383   PetscTruth     roworiented = baij->roworiented;
384   PetscErrorCode ierr;
385   PetscInt       i,j,row,col;
386   PetscInt       rstart_orig=baij->rstart_bs;
387   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
388   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
389 
390   /* Some Variables required in the macro */
391   Mat            A = baij->A;
392   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
393   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
394   MatScalar      *aa=a->a;
395 
396   Mat            B = baij->B;
397   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
398   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
399   MatScalar      *ba=b->a;
400 
401   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
402   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
403   MatScalar      *ap,*bap;
404 
405   PetscFunctionBegin;
406   for (i=0; i<m; i++) {
407     if (im[i] < 0) continue;
408 #if defined(PETSC_USE_BOPT_g)
409     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
410 #endif
411     if (im[i] >= rstart_orig && im[i] < rend_orig) {
412       row = im[i] - rstart_orig;
413       for (j=0; j<n; j++) {
414         if (in[j] >= cstart_orig && in[j] < cend_orig){
415           col = in[j] - cstart_orig;
416           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
417           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
418           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_BOPT_g)
421         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!baij->colmap) {
426               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
430             col  = col - 1;
431 #else
432             col = baij->colmap[in[j]/bs] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
435               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
438               B = baij->B;
439               b = (Mat_SeqBAIJ*)(B)->data;
440               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
441               ba=b->a;
442             } else col += in[j]%bs;
443           } else col = in[j];
444           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
445           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
446           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
447         }
448       }
449     } else {
450       if (!baij->donotstash) {
451         if (roworiented) {
452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453         } else {
454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
455         }
456       }
457     }
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
467   const MatScalar *value;
468   MatScalar       *barray=baij->barray;
469   PetscTruth      roworiented = baij->roworiented;
470   PetscErrorCode  ierr;
471   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
472   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
473   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
474 
475   PetscFunctionBegin;
476   if(!barray) {
477     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
478     baij->barray = barray;
479   }
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487     if (im[i] < 0) continue;
488 #if defined(PETSC_USE_BOPT_g)
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     if (im[i] >= rstart && im[i] < rend) {
492       row = im[i] - rstart;
493       for (j=0; j<n; j++) {
494         /* If NumCol = 1 then a copy is not required */
495         if ((roworiented) && (n == 1)) {
496           barray = (MatScalar*)v + i*bs2;
497         } else if((!roworiented) && (m == 1)) {
498           barray = (MatScalar*)v + j*bs2;
499         } else { /* Here a copy is required */
500           if (roworiented) {
501             value = v + i*(stepval+bs)*bs + j*bs;
502           } else {
503             value = v + j*(stepval+bs)*bs + i*bs;
504           }
505           for (ii=0; ii<bs; ii++,value+=stepval) {
506             for (jj=0; jj<bs; jj++) {
507               *barray++  = *value++;
508             }
509           }
510           barray -=bs2;
511         }
512 
513         if (in[j] >= cstart && in[j] < cend){
514           col  = in[j] - cstart;
515           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516         }
517         else if (in[j] < 0) continue;
518 #if defined(PETSC_USE_BOPT_g)
519         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
520 #endif
521         else {
522           if (mat->was_assembled) {
523             if (!baij->colmap) {
524               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
525             }
526 
527 #if defined(PETSC_USE_BOPT_g)
528 #if defined (PETSC_USE_CTABLE)
529             { PetscInt data;
530               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
531               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
532             }
533 #else
534             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
535 #endif
536 #endif
537 #if defined (PETSC_USE_CTABLE)
538 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
539             col  = (col - 1)/bs;
540 #else
541             col = (baij->colmap[in[j]] - 1)/bs;
542 #endif
543             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
544               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
545               col =  in[j];
546             }
547           }
548           else col = in[j];
549           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
550         }
551       }
552     } else {
553       if (!baij->donotstash) {
554         if (roworiented) {
555           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556         } else {
557           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
558         }
559       }
560     }
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #define HASH_KEY 0.6180339887
566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
569 #undef __FUNCT__
570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
572 {
573   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
574   PetscTruth     roworiented = baij->roworiented;
575   PetscErrorCode ierr;
576   PetscInt       i,j,row,col;
577   PetscInt       rstart_orig=baij->rstart_bs;
578   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
579   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
580   PetscReal      tmp;
581   MatScalar      **HD = baij->hd,value;
582 #if defined(PETSC_USE_BOPT_g)
583   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
584 #endif
585 
586   PetscFunctionBegin;
587 
588   for (i=0; i<m; i++) {
589 #if defined(PETSC_USE_BOPT_g)
590     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
591     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
592 #endif
593       row = im[i];
594     if (row >= rstart_orig && row < rend_orig) {
595       for (j=0; j<n; j++) {
596         col = in[j];
597         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
598         /* Look up PetscInto the Hash Table */
599         key = (row/bs)*Nbs+(col/bs)+1;
600         h1  = HASH(size,key,tmp);
601 
602 
603         idx = h1;
604 #if defined(PETSC_USE_BOPT_g)
605         insert_ct++;
606         total_ct++;
607         if (HT[idx] != key) {
608           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
609           if (idx == size) {
610             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
611             if (idx == h1) {
612               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
613             }
614           }
615         }
616 #else
617         if (HT[idx] != key) {
618           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
619           if (idx == size) {
620             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
621             if (idx == h1) {
622               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
623             }
624           }
625         }
626 #endif
627         /* A HASH table entry is found, so insert the values at the correct address */
628         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
629         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
630       }
631     } else {
632       if (!baij->donotstash) {
633         if (roworiented) {
634           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
635         } else {
636           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
637         }
638       }
639     }
640   }
641 #if defined(PETSC_USE_BOPT_g)
642   baij->ht_total_ct = total_ct;
643   baij->ht_insert_ct = insert_ct;
644 #endif
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
651 {
652   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
653   PetscTruth      roworiented = baij->roworiented;
654   PetscErrorCode  ierr;
655   PetscInt        i,j,ii,jj,row,col;
656   PetscInt        rstart=baij->rstart ;
657   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
658   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
659   PetscReal       tmp;
660   MatScalar       **HD = baij->hd,*baij_a;
661   const MatScalar *v_t,*value;
662 #if defined(PETSC_USE_BOPT_g)
663   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
664 #endif
665 
666   PetscFunctionBegin;
667 
668   if (roworiented) {
669     stepval = (n-1)*bs;
670   } else {
671     stepval = (m-1)*bs;
672   }
673   for (i=0; i<m; i++) {
674 #if defined(PETSC_USE_BOPT_g)
675     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
676     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
677 #endif
678     row   = im[i];
679     v_t   = v + i*bs2;
680     if (row >= rstart && row < rend) {
681       for (j=0; j<n; j++) {
682         col = in[j];
683 
684         /* Look up into the Hash Table */
685         key = row*Nbs+col+1;
686         h1  = HASH(size,key,tmp);
687 
688         idx = h1;
689 #if defined(PETSC_USE_BOPT_g)
690         total_ct++;
691         insert_ct++;
692        if (HT[idx] != key) {
693           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
694           if (idx == size) {
695             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
696             if (idx == h1) {
697               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
698             }
699           }
700         }
701 #else
702         if (HT[idx] != key) {
703           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
704           if (idx == size) {
705             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
706             if (idx == h1) {
707               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
708             }
709           }
710         }
711 #endif
712         baij_a = HD[idx];
713         if (roworiented) {
714           /*value = v + i*(stepval+bs)*bs + j*bs;*/
715           /* value = v + (i*(stepval+bs)+j)*bs; */
716           value = v_t;
717           v_t  += bs;
718           if (addv == ADD_VALUES) {
719             for (ii=0; ii<bs; ii++,value+=stepval) {
720               for (jj=ii; jj<bs2; jj+=bs) {
721                 baij_a[jj]  += *value++;
722               }
723             }
724           } else {
725             for (ii=0; ii<bs; ii++,value+=stepval) {
726               for (jj=ii; jj<bs2; jj+=bs) {
727                 baij_a[jj]  = *value++;
728               }
729             }
730           }
731         } else {
732           value = v + j*(stepval+bs)*bs + i*bs;
733           if (addv == ADD_VALUES) {
734             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
735               for (jj=0; jj<bs; jj++) {
736                 baij_a[jj]  += *value++;
737               }
738             }
739           } else {
740             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
741               for (jj=0; jj<bs; jj++) {
742                 baij_a[jj]  = *value++;
743               }
744             }
745           }
746         }
747       }
748     } else {
749       if (!baij->donotstash) {
750         if (roworiented) {
751           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
752         } else {
753           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
754         }
755       }
756     }
757   }
758 #if defined(PETSC_USE_BOPT_g)
759   baij->ht_total_ct = total_ct;
760   baij->ht_insert_ct = insert_ct;
761 #endif
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatGetValues_MPIBAIJ"
767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
768 {
769   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
770   PetscErrorCode ierr;
771   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
772   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
773 
774   PetscFunctionBegin;
775   for (i=0; i<m; i++) {
776     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
777     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
778     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
779       row = idxm[i] - bsrstart;
780       for (j=0; j<n; j++) {
781         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
782         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
783         if (idxn[j] >= bscstart && idxn[j] < bscend){
784           col = idxn[j] - bscstart;
785           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786         } else {
787           if (!baij->colmap) {
788             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
789           }
790 #if defined (PETSC_USE_CTABLE)
791           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
792           data --;
793 #else
794           data = baij->colmap[idxn[j]/bs]-1;
795 #endif
796           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
797           else {
798             col  = data + idxn[j]%bs;
799             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
800           }
801         }
802       }
803     } else {
804       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
805     }
806   }
807  PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatNorm_MPIBAIJ"
812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
813 {
814   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
815   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
816   PetscErrorCode ierr;
817   PetscInt       i,bs2=baij->bs2;
818   PetscReal      sum = 0.0;
819   MatScalar      *v;
820 
821   PetscFunctionBegin;
822   if (baij->size == 1) {
823     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
824   } else {
825     if (type == NORM_FROBENIUS) {
826       v = amat->a;
827       for (i=0; i<amat->nz*bs2; i++) {
828 #if defined(PETSC_USE_COMPLEX)
829         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
830 #else
831         sum += (*v)*(*v); v++;
832 #endif
833       }
834       v = bmat->a;
835       for (i=0; i<bmat->nz*bs2; i++) {
836 #if defined(PETSC_USE_COMPLEX)
837         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
838 #else
839         sum += (*v)*(*v); v++;
840 #endif
841       }
842       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
843       *nrm = sqrt(*nrm);
844     } else {
845       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
846     }
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 
852 /*
853   Creates the hash table, and sets the table
854   This table is created only once.
855   If new entried need to be added to the matrix
856   then the hash table has to be destroyed and
857   recreated.
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
862 {
863   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
864   Mat            A = baij->A,B=baij->B;
865   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
866   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
867   PetscErrorCode ierr;
868   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
869   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
870   PetscInt       *HT,key;
871   MatScalar      **HD;
872   PetscReal      tmp;
873 #if defined(PETSC_USE_BOPT_g)
874   PetscInt       ct=0,max=0;
875 #endif
876 
877   PetscFunctionBegin;
878   baij->ht_size=(PetscInt)(factor*nz);
879   size = baij->ht_size;
880 
881   if (baij->ht) {
882     PetscFunctionReturn(0);
883   }
884 
885   /* Allocate Memory for Hash Table */
886   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
887   baij->ht = (PetscInt*)(baij->hd + size);
888   HD       = baij->hd;
889   HT       = baij->ht;
890 
891 
892   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
893 
894 
895   /* Loop Over A */
896   for (i=0; i<a->mbs; i++) {
897     for (j=ai[i]; j<ai[i+1]; j++) {
898       row = i+rstart;
899       col = aj[j]+cstart;
900 
901       key = row*Nbs + col + 1;
902       h1  = HASH(size,key,tmp);
903       for (k=0; k<size; k++){
904         if (!HT[(h1+k)%size]) {
905           HT[(h1+k)%size] = key;
906           HD[(h1+k)%size] = a->a + j*bs2;
907           break;
908 #if defined(PETSC_USE_BOPT_g)
909         } else {
910           ct++;
911 #endif
912         }
913       }
914 #if defined(PETSC_USE_BOPT_g)
915       if (k> max) max = k;
916 #endif
917     }
918   }
919   /* Loop Over B */
920   for (i=0; i<b->mbs; i++) {
921     for (j=bi[i]; j<bi[i+1]; j++) {
922       row = i+rstart;
923       col = garray[bj[j]];
924       key = row*Nbs + col + 1;
925       h1  = HASH(size,key,tmp);
926       for (k=0; k<size; k++){
927         if (!HT[(h1+k)%size]) {
928           HT[(h1+k)%size] = key;
929           HD[(h1+k)%size] = b->a + j*bs2;
930           break;
931 #if defined(PETSC_USE_BOPT_g)
932         } else {
933           ct++;
934 #endif
935         }
936       }
937 #if defined(PETSC_USE_BOPT_g)
938       if (k> max) max = k;
939 #endif
940     }
941   }
942 
943   /* Print Summary */
944 #if defined(PETSC_USE_BOPT_g)
945   for (i=0,j=0; i<size; i++) {
946     if (HT[i]) {j++;}
947   }
948   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
949 #endif
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
956 {
957   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
958   PetscErrorCode ierr;
959   PetscInt       nstash,reallocs;
960   InsertMode     addv;
961 
962   PetscFunctionBegin;
963   if (baij->donotstash) {
964     PetscFunctionReturn(0);
965   }
966 
967   /* make sure all processors are either in INSERTMODE or ADDMODE */
968   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
969   if (addv == (ADD_VALUES|INSERT_VALUES)) {
970     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
971   }
972   mat->insertmode = addv; /* in case this processor had no cache */
973 
974   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
975   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
976   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
977   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
978   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
979   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
986 {
987   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
988   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
989   PetscErrorCode ierr;
990   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
991   PetscInt       *row,*col,other_disassembled;
992   PetscTruth     r1,r2,r3;
993   MatScalar      *val;
994   InsertMode     addv = mat->insertmode;
995   PetscMPIInt    n;
996 
997   PetscFunctionBegin;
998   if (!baij->donotstash) {
999     while (1) {
1000       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1001       if (!flg) break;
1002 
1003       for (i=0; i<n;) {
1004         /* Now identify the consecutive vals belonging to the same row */
1005         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1006         if (j < n) ncols = j-i;
1007         else       ncols = n-i;
1008         /* Now assemble all these values with a single function call */
1009         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1010         i = j;
1011       }
1012     }
1013     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1014     /* Now process the block-stash. Since the values are stashed column-oriented,
1015        set the roworiented flag to column oriented, and after MatSetValues()
1016        restore the original flags */
1017     r1 = baij->roworiented;
1018     r2 = a->roworiented;
1019     r3 = b->roworiented;
1020     baij->roworiented = PETSC_FALSE;
1021     a->roworiented    = PETSC_FALSE;
1022     b->roworiented    = PETSC_FALSE;
1023     while (1) {
1024       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1025       if (!flg) break;
1026 
1027       for (i=0; i<n;) {
1028         /* Now identify the consecutive vals belonging to the same row */
1029         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1030         if (j < n) ncols = j-i;
1031         else       ncols = n-i;
1032         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1033         i = j;
1034       }
1035     }
1036     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1037     baij->roworiented = r1;
1038     a->roworiented    = r2;
1039     b->roworiented    = r3;
1040   }
1041 
1042   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1043   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1044 
1045   /* determine if any processor has disassembled, if so we must
1046      also disassemble ourselfs, in order that we may reassemble. */
1047   /*
1048      if nonzero structure of submatrix B cannot change then we know that
1049      no processor disassembled thus we can skip this stuff
1050   */
1051   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1052     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1053     if (mat->was_assembled && !other_disassembled) {
1054       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1055     }
1056   }
1057 
1058   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1059     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1060   }
1061   b->compressedrow.use = PETSC_TRUE;
1062   b->compressedrow.checked = PETSC_FALSE;
1063   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1064   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1065 
1066 #if defined(PETSC_USE_BOPT_g)
1067   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1068     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1069     baij->ht_total_ct  = 0;
1070     baij->ht_insert_ct = 0;
1071   }
1072 #endif
1073   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1074     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1075     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1076     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1077   }
1078 
1079   if (baij->rowvalues) {
1080     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1081     baij->rowvalues = 0;
1082   }
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1088 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1089 {
1090   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1091   PetscErrorCode    ierr;
1092   PetscMPIInt       size = baij->size,rank = baij->rank;
1093   PetscInt          bs = mat->bs;
1094   PetscTruth        iascii,isdraw;
1095   PetscViewer       sviewer;
1096   PetscViewerFormat format;
1097 
1098   PetscFunctionBegin;
1099   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1100   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1101   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1102   if (iascii) {
1103     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1104     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1105       MatInfo info;
1106       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1107       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1108       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1109               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1110               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1111       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1112       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1113       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1114       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1115       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1116       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1117       PetscFunctionReturn(0);
1118     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1119       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1120       PetscFunctionReturn(0);
1121     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1122       PetscFunctionReturn(0);
1123     }
1124   }
1125 
1126   if (isdraw) {
1127     PetscDraw       draw;
1128     PetscTruth isnull;
1129     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1130     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1131   }
1132 
1133   if (size == 1) {
1134     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1135     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1136   } else {
1137     /* assemble the entire matrix onto first processor. */
1138     Mat         A;
1139     Mat_SeqBAIJ *Aloc;
1140     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1141     MatScalar   *a;
1142 
1143     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1144     /* Perhaps this should be the type of mat? */
1145     if (!rank) {
1146       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1147     } else {
1148       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1149     }
1150     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1151     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1152     PetscLogObjectParent(mat,A);
1153 
1154     /* copy over the A part */
1155     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1156     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1157     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1158 
1159     for (i=0; i<mbs; i++) {
1160       rvals[0] = bs*(baij->rstart + i);
1161       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1162       for (j=ai[i]; j<ai[i+1]; j++) {
1163         col = (baij->cstart+aj[j])*bs;
1164         for (k=0; k<bs; k++) {
1165           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1166           col++; a += bs;
1167         }
1168       }
1169     }
1170     /* copy over the B part */
1171     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1172     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1173     for (i=0; i<mbs; i++) {
1174       rvals[0] = bs*(baij->rstart + i);
1175       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1176       for (j=ai[i]; j<ai[i+1]; j++) {
1177         col = baij->garray[aj[j]]*bs;
1178         for (k=0; k<bs; k++) {
1179           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1180           col++; a += bs;
1181         }
1182       }
1183     }
1184     ierr = PetscFree(rvals);CHKERRQ(ierr);
1185     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1186     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1187     /*
1188        Everyone has to call to draw the matrix since the graphics waits are
1189        synchronized across all processors that share the PetscDraw object
1190     */
1191     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1192     if (!rank) {
1193       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1194       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1195     }
1196     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1197     ierr = MatDestroy(A);CHKERRQ(ierr);
1198   }
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatView_MPIBAIJ"
1204 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1205 {
1206   PetscErrorCode ierr;
1207   PetscTruth     iascii,isdraw,issocket,isbinary;
1208 
1209   PetscFunctionBegin;
1210   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1211   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1212   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1213   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1214   if (iascii || isdraw || issocket || isbinary) {
1215     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1216   } else {
1217     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1218   }
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 #undef __FUNCT__
1223 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1224 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1225 {
1226   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1227   PetscErrorCode ierr;
1228 
1229   PetscFunctionBegin;
1230 #if defined(PETSC_USE_LOG)
1231   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1232 #endif
1233   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1234   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1235   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1236   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1237   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1238 #if defined (PETSC_USE_CTABLE)
1239   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1240 #else
1241   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1242 #endif
1243   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1244   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1245   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1246   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1247   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1248   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1249 #if defined(PETSC_USE_MAT_SINGLE)
1250   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1251 #endif
1252   ierr = PetscFree(baij);CHKERRQ(ierr);
1253 
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1256   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1257   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1258   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1259   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1260   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatMult_MPIBAIJ"
1266 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1267 {
1268   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1269   PetscErrorCode ierr;
1270   PetscInt       nt;
1271 
1272   PetscFunctionBegin;
1273   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1274   if (nt != A->n) {
1275     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1276   }
1277   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1278   if (nt != A->m) {
1279     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1280   }
1281   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1282   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1283   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1284   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1285   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #undef __FUNCT__
1290 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1291 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1292 {
1293   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1294   PetscErrorCode ierr;
1295 
1296   PetscFunctionBegin;
1297   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1298   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1299   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1300   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1306 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1307 {
1308   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1309   PetscErrorCode ierr;
1310 
1311   PetscFunctionBegin;
1312   /* do nondiagonal part */
1313   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1314   /* send it on its way */
1315   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1316   /* do local part */
1317   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1318   /* receive remote parts: note this assumes the values are not actually */
1319   /* inserted in yy until the next line, which is true for my implementation*/
1320   /* but is not perhaps always true. */
1321   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1327 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1328 {
1329   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   /* do nondiagonal part */
1334   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1335   /* send it on its way */
1336   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1337   /* do local part */
1338   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1339   /* receive remote parts: note this assumes the values are not actually */
1340   /* inserted in yy until the next line, which is true for my implementation*/
1341   /* but is not perhaps always true. */
1342   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 /*
1347   This only works correctly for square matrices where the subblock A->A is the
1348    diagonal block
1349 */
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1352 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1353 {
1354   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1355   PetscErrorCode ierr;
1356 
1357   PetscFunctionBegin;
1358   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1359   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "MatScale_MPIBAIJ"
1365 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1366 {
1367   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1368   PetscErrorCode ierr;
1369 
1370   PetscFunctionBegin;
1371   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1372   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1378 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1379 {
1380   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1381   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1382   PetscErrorCode ierr;
1383   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1384   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1385   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1386 
1387   PetscFunctionBegin;
1388   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1389   mat->getrowactive = PETSC_TRUE;
1390 
1391   if (!mat->rowvalues && (idx || v)) {
1392     /*
1393         allocate enough space to hold information from the longest row.
1394     */
1395     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1396     PetscInt     max = 1,mbs = mat->mbs,tmp;
1397     for (i=0; i<mbs; i++) {
1398       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1399       if (max < tmp) { max = tmp; }
1400     }
1401     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1402     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1403   }
1404 
1405   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1406   lrow = row - brstart;
1407 
1408   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1409   if (!v)   {pvA = 0; pvB = 0;}
1410   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1411   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1412   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1413   nztot = nzA + nzB;
1414 
1415   cmap  = mat->garray;
1416   if (v  || idx) {
1417     if (nztot) {
1418       /* Sort by increasing column numbers, assuming A and B already sorted */
1419       PetscInt imark = -1;
1420       if (v) {
1421         *v = v_p = mat->rowvalues;
1422         for (i=0; i<nzB; i++) {
1423           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1424           else break;
1425         }
1426         imark = i;
1427         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1428         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1429       }
1430       if (idx) {
1431         *idx = idx_p = mat->rowindices;
1432         if (imark > -1) {
1433           for (i=0; i<imark; i++) {
1434             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1435           }
1436         } else {
1437           for (i=0; i<nzB; i++) {
1438             if (cmap[cworkB[i]/bs] < cstart)
1439               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1440             else break;
1441           }
1442           imark = i;
1443         }
1444         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1445         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1446       }
1447     } else {
1448       if (idx) *idx = 0;
1449       if (v)   *v   = 0;
1450     }
1451   }
1452   *nz = nztot;
1453   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1454   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1460 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1461 {
1462   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1463 
1464   PetscFunctionBegin;
1465   if (baij->getrowactive == PETSC_FALSE) {
1466     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1467   }
1468   baij->getrowactive = PETSC_FALSE;
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1474 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1475 {
1476   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1477   PetscErrorCode ierr;
1478 
1479   PetscFunctionBegin;
1480   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1481   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1487 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1488 {
1489   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1490   Mat            A = a->A,B = a->B;
1491   PetscErrorCode ierr;
1492   PetscReal      isend[5],irecv[5];
1493 
1494   PetscFunctionBegin;
1495   info->block_size     = (PetscReal)matin->bs;
1496   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1497   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1498   isend[3] = info->memory;  isend[4] = info->mallocs;
1499   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1500   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1501   isend[3] += info->memory;  isend[4] += info->mallocs;
1502   if (flag == MAT_LOCAL) {
1503     info->nz_used      = isend[0];
1504     info->nz_allocated = isend[1];
1505     info->nz_unneeded  = isend[2];
1506     info->memory       = isend[3];
1507     info->mallocs      = isend[4];
1508   } else if (flag == MAT_GLOBAL_MAX) {
1509     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1510     info->nz_used      = irecv[0];
1511     info->nz_allocated = irecv[1];
1512     info->nz_unneeded  = irecv[2];
1513     info->memory       = irecv[3];
1514     info->mallocs      = irecv[4];
1515   } else if (flag == MAT_GLOBAL_SUM) {
1516     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1517     info->nz_used      = irecv[0];
1518     info->nz_allocated = irecv[1];
1519     info->nz_unneeded  = irecv[2];
1520     info->memory       = irecv[3];
1521     info->mallocs      = irecv[4];
1522   } else {
1523     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1524   }
1525   info->rows_global       = (PetscReal)A->M;
1526   info->columns_global    = (PetscReal)A->N;
1527   info->rows_local        = (PetscReal)A->m;
1528   info->columns_local     = (PetscReal)A->N;
1529   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1530   info->fill_ratio_needed = 0;
1531   info->factor_mallocs    = 0;
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1537 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1538 {
1539   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   switch (op) {
1544   case MAT_NO_NEW_NONZERO_LOCATIONS:
1545   case MAT_YES_NEW_NONZERO_LOCATIONS:
1546   case MAT_COLUMNS_UNSORTED:
1547   case MAT_COLUMNS_SORTED:
1548   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1549   case MAT_KEEP_ZEROED_ROWS:
1550   case MAT_NEW_NONZERO_LOCATION_ERR:
1551     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1552     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1553     break;
1554   case MAT_ROW_ORIENTED:
1555     a->roworiented = PETSC_TRUE;
1556     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1557     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1558     break;
1559   case MAT_ROWS_SORTED:
1560   case MAT_ROWS_UNSORTED:
1561   case MAT_YES_NEW_DIAGONALS:
1562     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1563     break;
1564   case MAT_COLUMN_ORIENTED:
1565     a->roworiented = PETSC_FALSE;
1566     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1567     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1568     break;
1569   case MAT_IGNORE_OFF_PROC_ENTRIES:
1570     a->donotstash = PETSC_TRUE;
1571     break;
1572   case MAT_NO_NEW_DIAGONALS:
1573     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1574   case MAT_USE_HASH_TABLE:
1575     a->ht_flag = PETSC_TRUE;
1576     break;
1577   case MAT_SYMMETRIC:
1578   case MAT_STRUCTURALLY_SYMMETRIC:
1579   case MAT_HERMITIAN:
1580   case MAT_SYMMETRY_ETERNAL:
1581     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1582     break;
1583   case MAT_NOT_SYMMETRIC:
1584   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1585   case MAT_NOT_HERMITIAN:
1586   case MAT_NOT_SYMMETRY_ETERNAL:
1587     break;
1588   default:
1589     SETERRQ(PETSC_ERR_SUP,"unknown option");
1590   }
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 #undef __FUNCT__
1595 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1596 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1597 {
1598   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1599   Mat_SeqBAIJ    *Aloc;
1600   Mat            B;
1601   PetscErrorCode ierr;
1602   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1603   PetscInt       bs=A->bs,mbs=baij->mbs;
1604   MatScalar      *a;
1605 
1606   PetscFunctionBegin;
1607   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1608   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1609   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1610   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1611 
1612   /* copy over the A part */
1613   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1614   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1615   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1616 
1617   for (i=0; i<mbs; i++) {
1618     rvals[0] = bs*(baij->rstart + i);
1619     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1620     for (j=ai[i]; j<ai[i+1]; j++) {
1621       col = (baij->cstart+aj[j])*bs;
1622       for (k=0; k<bs; k++) {
1623         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1624         col++; a += bs;
1625       }
1626     }
1627   }
1628   /* copy over the B part */
1629   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1630   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1631   for (i=0; i<mbs; i++) {
1632     rvals[0] = bs*(baij->rstart + i);
1633     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1634     for (j=ai[i]; j<ai[i+1]; j++) {
1635       col = baij->garray[aj[j]]*bs;
1636       for (k=0; k<bs; k++) {
1637         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1638         col++; a += bs;
1639       }
1640     }
1641   }
1642   ierr = PetscFree(rvals);CHKERRQ(ierr);
1643   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1645 
1646   if (matout) {
1647     *matout = B;
1648   } else {
1649     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1650   }
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 #undef __FUNCT__
1655 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1656 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1657 {
1658   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1659   Mat            a = baij->A,b = baij->B;
1660   PetscErrorCode ierr;
1661   PetscInt       s1,s2,s3;
1662 
1663   PetscFunctionBegin;
1664   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1665   if (rr) {
1666     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1667     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1668     /* Overlap communication with computation. */
1669     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1670   }
1671   if (ll) {
1672     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1673     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1674     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1675   }
1676   /* scale  the diagonal block */
1677   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1678 
1679   if (rr) {
1680     /* Do a scatter end and then right scale the off-diagonal block */
1681     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1682     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1683   }
1684 
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1690 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1691 {
1692   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1693   PetscErrorCode ierr;
1694   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1695   PetscInt       i,N,*rows,*owners = l->rowners;
1696   PetscInt       *nprocs,j,idx,nsends,row;
1697   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1698   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
1699   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1700   MPI_Comm       comm = A->comm;
1701   MPI_Request    *send_waits,*recv_waits;
1702   MPI_Status     recv_status,*send_status;
1703   IS             istmp;
1704   PetscTruth     found;
1705 
1706   PetscFunctionBegin;
1707   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1708   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1709 
1710   /*  first count number of contributors to each processor */
1711   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1712   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1713   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1714   for (i=0; i<N; i++) {
1715     idx   = rows[i];
1716     found = PETSC_FALSE;
1717     for (j=0; j<size; j++) {
1718       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1719         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1720       }
1721     }
1722     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1723   }
1724   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1725 
1726   /* inform other processors of number of messages and max length*/
1727   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1728 
1729   /* post receives:   */
1730   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1731   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1732   for (i=0; i<nrecvs; i++) {
1733     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1734   }
1735 
1736   /* do sends:
1737      1) starts[i] gives the starting index in svalues for stuff going to
1738      the ith processor
1739   */
1740   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1741   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1742   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1743   starts[0]  = 0;
1744   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1745   for (i=0; i<N; i++) {
1746     svalues[starts[owner[i]]++] = rows[i];
1747   }
1748   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1749 
1750   starts[0] = 0;
1751   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1752   count = 0;
1753   for (i=0; i<size; i++) {
1754     if (nprocs[2*i+1]) {
1755       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1756     }
1757   }
1758   ierr = PetscFree(starts);CHKERRQ(ierr);
1759 
1760   base = owners[rank]*bs;
1761 
1762   /*  wait on receives */
1763   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1764   source = lens + nrecvs;
1765   count  = nrecvs; slen = 0;
1766   while (count) {
1767     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1768     /* unpack receives into our local space */
1769     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1770     source[imdex]  = recv_status.MPI_SOURCE;
1771     lens[imdex]    = n;
1772     slen          += n;
1773     count--;
1774   }
1775   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1776 
1777   /* move the data into the send scatter */
1778   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1779   count = 0;
1780   for (i=0; i<nrecvs; i++) {
1781     values = rvalues + i*nmax;
1782     for (j=0; j<lens[i]; j++) {
1783       lrows[count++] = values[j] - base;
1784     }
1785   }
1786   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1787   ierr = PetscFree(lens);CHKERRQ(ierr);
1788   ierr = PetscFree(owner);CHKERRQ(ierr);
1789   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1790 
1791   /* actually zap the local rows */
1792   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1793   PetscLogObjectParent(A,istmp);
1794 
1795   /*
1796         Zero the required rows. If the "diagonal block" of the matrix
1797      is square and the user wishes to set the diagonal we use seperate
1798      code so that MatSetValues() is not called for each diagonal allocating
1799      new memory, thus calling lots of mallocs and slowing things down.
1800 
1801        Contributed by: Mathew Knepley
1802   */
1803   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1804   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1805   if (diag && (l->A->M == l->A->N)) {
1806     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1807   } else if (diag) {
1808     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1809     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1810       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1811 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1812     }
1813     for (i=0; i<slen; i++) {
1814       row  = lrows[i] + rstart_bs;
1815       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1816     }
1817     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1818     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1819   } else {
1820     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1821   }
1822 
1823   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1824   ierr = PetscFree(lrows);CHKERRQ(ierr);
1825 
1826   /* wait on sends */
1827   if (nsends) {
1828     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1829     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1830     ierr = PetscFree(send_status);CHKERRQ(ierr);
1831   }
1832   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1833   ierr = PetscFree(svalues);CHKERRQ(ierr);
1834 
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1840 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1841 {
1842   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1843   MPI_Comm          comm = A->comm;
1844   static PetscTruth called = PETSC_FALSE;
1845   PetscErrorCode    ierr;
1846 
1847   PetscFunctionBegin;
1848   if (!a->rank) {
1849     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1850   }
1851   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1852   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1853   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1859 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1860 {
1861   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatEqual_MPIBAIJ"
1873 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1874 {
1875   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1876   Mat            a,b,c,d;
1877   PetscTruth     flg;
1878   PetscErrorCode ierr;
1879 
1880   PetscFunctionBegin;
1881   a = matA->A; b = matA->B;
1882   c = matB->A; d = matB->B;
1883 
1884   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1885   if (flg == PETSC_TRUE) {
1886     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1887   }
1888   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1895 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1896 {
1897   PetscErrorCode ierr;
1898 
1899   PetscFunctionBegin;
1900   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 /* -------------------------------------------------------------------*/
1905 static struct _MatOps MatOps_Values = {
1906        MatSetValues_MPIBAIJ,
1907        MatGetRow_MPIBAIJ,
1908        MatRestoreRow_MPIBAIJ,
1909        MatMult_MPIBAIJ,
1910 /* 4*/ MatMultAdd_MPIBAIJ,
1911        MatMultTranspose_MPIBAIJ,
1912        MatMultTransposeAdd_MPIBAIJ,
1913        0,
1914        0,
1915        0,
1916 /*10*/ 0,
1917        0,
1918        0,
1919        0,
1920        MatTranspose_MPIBAIJ,
1921 /*15*/ MatGetInfo_MPIBAIJ,
1922        MatEqual_MPIBAIJ,
1923        MatGetDiagonal_MPIBAIJ,
1924        MatDiagonalScale_MPIBAIJ,
1925        MatNorm_MPIBAIJ,
1926 /*20*/ MatAssemblyBegin_MPIBAIJ,
1927        MatAssemblyEnd_MPIBAIJ,
1928        0,
1929        MatSetOption_MPIBAIJ,
1930        MatZeroEntries_MPIBAIJ,
1931 /*25*/ MatZeroRows_MPIBAIJ,
1932        0,
1933        0,
1934        0,
1935        0,
1936 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1937        0,
1938        0,
1939        0,
1940        0,
1941 /*35*/ MatDuplicate_MPIBAIJ,
1942        0,
1943        0,
1944        0,
1945        0,
1946 /*40*/ 0,
1947        MatGetSubMatrices_MPIBAIJ,
1948        MatIncreaseOverlap_MPIBAIJ,
1949        MatGetValues_MPIBAIJ,
1950        0,
1951 /*45*/ MatPrintHelp_MPIBAIJ,
1952        MatScale_MPIBAIJ,
1953        0,
1954        0,
1955        0,
1956 /*50*/ 0,
1957        0,
1958        0,
1959        0,
1960        0,
1961 /*55*/ 0,
1962        0,
1963        MatSetUnfactored_MPIBAIJ,
1964        0,
1965        MatSetValuesBlocked_MPIBAIJ,
1966 /*60*/ 0,
1967        MatDestroy_MPIBAIJ,
1968        MatView_MPIBAIJ,
1969        MatGetPetscMaps_Petsc,
1970        0,
1971 /*65*/ 0,
1972        0,
1973        0,
1974        0,
1975        0,
1976 /*70*/ MatGetRowMax_MPIBAIJ,
1977        0,
1978        0,
1979        0,
1980        0,
1981 /*75*/ 0,
1982        0,
1983        0,
1984        0,
1985        0,
1986 /*80*/ 0,
1987        0,
1988        0,
1989        0,
1990        MatLoad_MPIBAIJ,
1991 /*85*/ 0,
1992        0,
1993        0,
1994        0,
1995        0,
1996 /*90*/ 0,
1997        0,
1998        0,
1999        0,
2000        0,
2001 /*95*/ 0,
2002        0,
2003        0,
2004        0};
2005 
2006 
2007 EXTERN_C_BEGIN
2008 #undef __FUNCT__
2009 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2010 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2011 {
2012   PetscFunctionBegin;
2013   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2014   *iscopy = PETSC_FALSE;
2015   PetscFunctionReturn(0);
2016 }
2017 EXTERN_C_END
2018 
2019 EXTERN_C_BEGIN
2020 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
2021 EXTERN_C_END
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2025 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2026 {
2027   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2028   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2029   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2030   const PetscInt *JJ;
2031   PetscScalar    *values;
2032   PetscErrorCode ierr;
2033 
2034   PetscFunctionBegin;
2035 #if defined(PETSC_OPT_g)
2036   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2037 #endif
2038   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2039   o_nnz = d_nnz + m;
2040 
2041   for (i=0; i<m; i++) {
2042     nnz     = I[i+1]- I[i];
2043     JJ      = J + I[i];
2044     nnz_max = PetscMax(nnz_max,nnz);
2045 #if defined(PETSC_OPT_g)
2046     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2047 #endif
2048     for (j=0; j<nnz; j++) {
2049       if (*JJ >= cstart) break;
2050       JJ++;
2051     }
2052     d = 0;
2053     for (; j<nnz; j++) {
2054       if (*JJ++ >= cend) break;
2055       d++;
2056     }
2057     d_nnz[i] = d;
2058     o_nnz[i] = nnz - d;
2059   }
2060   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2061   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2062 
2063   if (v) values = (PetscScalar*)v;
2064   else {
2065     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2066     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2067   }
2068 
2069   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2070   for (i=0; i<m; i++) {
2071     ii   = i + rstart;
2072     nnz  = I[i+1]- I[i];
2073     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2074   }
2075   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2076   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2077   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2078 
2079   if (!v) {
2080     ierr = PetscFree(values);CHKERRQ(ierr);
2081   }
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2087 /*@C
2088    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2089    (the default parallel PETSc format).
2090 
2091    Collective on MPI_Comm
2092 
2093    Input Parameters:
2094 +  A - the matrix
2095 .  i - the indices into j for the start of each local row (starts with zero)
2096 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2097 -  v - optional values in the matrix
2098 
2099    Level: developer
2100 
2101 .keywords: matrix, aij, compressed row, sparse, parallel
2102 
2103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2104 @*/
2105 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2106 {
2107   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2108 
2109   PetscFunctionBegin;
2110   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2111   if (f) {
2112     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2113   }
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 EXTERN_C_BEGIN
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2120 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2121 {
2122   Mat_MPIBAIJ    *b;
2123   PetscErrorCode ierr;
2124   PetscInt       i;
2125 
2126   PetscFunctionBegin;
2127   B->preallocated = PETSC_TRUE;
2128   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2129 
2130   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2131   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2132   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2133   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2134   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2135   if (d_nnz) {
2136   for (i=0; i<B->m/bs; i++) {
2137       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2138     }
2139   }
2140   if (o_nnz) {
2141     for (i=0; i<B->m/bs; i++) {
2142       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2143     }
2144   }
2145 
2146   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2147   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2148   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2149   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2150 
2151   b = (Mat_MPIBAIJ*)B->data;
2152   B->bs  = bs;
2153   b->bs2 = bs*bs;
2154   b->mbs = B->m/bs;
2155   b->nbs = B->n/bs;
2156   b->Mbs = B->M/bs;
2157   b->Nbs = B->N/bs;
2158 
2159   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2160   b->rowners[0]    = 0;
2161   for (i=2; i<=b->size; i++) {
2162     b->rowners[i] += b->rowners[i-1];
2163   }
2164   b->rstart    = b->rowners[b->rank];
2165   b->rend      = b->rowners[b->rank+1];
2166 
2167   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2168   b->cowners[0] = 0;
2169   for (i=2; i<=b->size; i++) {
2170     b->cowners[i] += b->cowners[i-1];
2171   }
2172   b->cstart    = b->cowners[b->rank];
2173   b->cend      = b->cowners[b->rank+1];
2174 
2175   for (i=0; i<=b->size; i++) {
2176     b->rowners_bs[i] = b->rowners[i]*bs;
2177   }
2178   b->rstart_bs = b->rstart*bs;
2179   b->rend_bs   = b->rend*bs;
2180   b->cstart_bs = b->cstart*bs;
2181   b->cend_bs   = b->cend*bs;
2182 
2183   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2184   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2185   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2186   PetscLogObjectParent(B,b->A);
2187   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2188   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2189   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2190   PetscLogObjectParent(B,b->B);
2191 
2192   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2193 
2194   PetscFunctionReturn(0);
2195 }
2196 EXTERN_C_END
2197 
2198 EXTERN_C_BEGIN
2199 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2200 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2201 EXTERN_C_END
2202 
2203 /*MC
2204    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2205 
2206    Options Database Keys:
2207 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2208 
2209   Level: beginner
2210 
2211 .seealso: MatCreateMPIBAIJ
2212 M*/
2213 
2214 EXTERN_C_BEGIN
2215 #undef __FUNCT__
2216 #define __FUNCT__ "MatCreate_MPIBAIJ"
2217 PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2218 {
2219   Mat_MPIBAIJ    *b;
2220   PetscErrorCode ierr;
2221   PetscTruth     flg;
2222 
2223   PetscFunctionBegin;
2224   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2225   B->data = (void*)b;
2226 
2227   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2228   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2229   B->mapping    = 0;
2230   B->factor     = 0;
2231   B->assembled  = PETSC_FALSE;
2232 
2233   B->insertmode = NOT_SET_VALUES;
2234   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2235   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2236 
2237   /* build local table of row and column ownerships */
2238   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2239   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2240   b->cowners    = b->rowners + b->size + 2;
2241   b->rowners_bs = b->cowners + b->size + 2;
2242 
2243   /* build cache for off array entries formed */
2244   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2245   b->donotstash  = PETSC_FALSE;
2246   b->colmap      = PETSC_NULL;
2247   b->garray      = PETSC_NULL;
2248   b->roworiented = PETSC_TRUE;
2249 
2250 #if defined(PETSC_USE_MAT_SINGLE)
2251   /* stuff for MatSetValues_XXX in single precision */
2252   b->setvalueslen     = 0;
2253   b->setvaluescopy    = PETSC_NULL;
2254 #endif
2255 
2256   /* stuff used in block assembly */
2257   b->barray       = 0;
2258 
2259   /* stuff used for matrix vector multiply */
2260   b->lvec         = 0;
2261   b->Mvctx        = 0;
2262 
2263   /* stuff for MatGetRow() */
2264   b->rowindices   = 0;
2265   b->rowvalues    = 0;
2266   b->getrowactive = PETSC_FALSE;
2267 
2268   /* hash table stuff */
2269   b->ht           = 0;
2270   b->hd           = 0;
2271   b->ht_size      = 0;
2272   b->ht_flag      = PETSC_FALSE;
2273   b->ht_fact      = 0;
2274   b->ht_total_ct  = 0;
2275   b->ht_insert_ct = 0;
2276 
2277   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2278   if (flg) {
2279     PetscReal fact = 1.39;
2280     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2281     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2282     if (fact <= 1.0) fact = 1.39;
2283     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2284     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2285   }
2286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2287                                      "MatStoreValues_MPIBAIJ",
2288                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2289   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2290                                      "MatRetrieveValues_MPIBAIJ",
2291                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2292   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2293                                      "MatGetDiagonalBlock_MPIBAIJ",
2294                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2295   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2296                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2297                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2298   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2299 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2300 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2301   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2302                                      "MatDiagonalScaleLocal_MPIBAIJ",
2303                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2304   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2305                                      "MatSetHashTableFactor_MPIBAIJ",
2306                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2307   PetscFunctionReturn(0);
2308 }
2309 EXTERN_C_END
2310 
2311 /*MC
2312    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2313 
2314    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2315    and MATMPIBAIJ otherwise.
2316 
2317    Options Database Keys:
2318 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2319 
2320   Level: beginner
2321 
2322 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2323 M*/
2324 
2325 EXTERN_C_BEGIN
2326 #undef __FUNCT__
2327 #define __FUNCT__ "MatCreate_BAIJ"
2328 PetscErrorCode MatCreate_BAIJ(Mat A)
2329 {
2330   PetscErrorCode ierr;
2331   PetscMPIInt    size;
2332 
2333   PetscFunctionBegin;
2334   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2335   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2336   if (size == 1) {
2337     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2338   } else {
2339     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2340   }
2341   PetscFunctionReturn(0);
2342 }
2343 EXTERN_C_END
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2347 /*@C
2348    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2349    (block compressed row).  For good matrix assembly performance
2350    the user should preallocate the matrix storage by setting the parameters
2351    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2352    performance can be increased by more than a factor of 50.
2353 
2354    Collective on Mat
2355 
2356    Input Parameters:
2357 +  A - the matrix
2358 .  bs   - size of blockk
2359 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2360            submatrix  (same for all local rows)
2361 .  d_nnz - array containing the number of block nonzeros in the various block rows
2362            of the in diagonal portion of the local (possibly different for each block
2363            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2364 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2365            submatrix (same for all local rows).
2366 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2367            off-diagonal portion of the local submatrix (possibly different for
2368            each block row) or PETSC_NULL.
2369 
2370    If the *_nnz parameter is given then the *_nz parameter is ignored
2371 
2372    Options Database Keys:
2373 .   -mat_no_unroll - uses code that does not unroll the loops in the
2374                      block calculations (much slower)
2375 .   -mat_block_size - size of the blocks to use
2376 
2377    Notes:
2378    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2379    than it must be used on all processors that share the object for that argument.
2380 
2381    Storage Information:
2382    For a square global matrix we define each processor's diagonal portion
2383    to be its local rows and the corresponding columns (a square submatrix);
2384    each processor's off-diagonal portion encompasses the remainder of the
2385    local matrix (a rectangular submatrix).
2386 
2387    The user can specify preallocated storage for the diagonal part of
2388    the local submatrix with either d_nz or d_nnz (not both).  Set
2389    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2390    memory allocation.  Likewise, specify preallocated storage for the
2391    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2392 
2393    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2394    the figure below we depict these three local rows and all columns (0-11).
2395 
2396 .vb
2397            0 1 2 3 4 5 6 7 8 9 10 11
2398           -------------------
2399    row 3  |  o o o d d d o o o o o o
2400    row 4  |  o o o d d d o o o o o o
2401    row 5  |  o o o d d d o o o o o o
2402           -------------------
2403 .ve
2404 
2405    Thus, any entries in the d locations are stored in the d (diagonal)
2406    submatrix, and any entries in the o locations are stored in the
2407    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2408    stored simply in the MATSEQBAIJ format for compressed row storage.
2409 
2410    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2411    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2412    In general, for PDE problems in which most nonzeros are near the diagonal,
2413    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2414    or you will get TERRIBLE performance; see the users' manual chapter on
2415    matrices.
2416 
2417    Level: intermediate
2418 
2419 .keywords: matrix, block, aij, compressed row, sparse, parallel
2420 
2421 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2422 @*/
2423 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2424 {
2425   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2426 
2427   PetscFunctionBegin;
2428   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2429   if (f) {
2430     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2431   }
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 #undef __FUNCT__
2436 #define __FUNCT__ "MatCreateMPIBAIJ"
2437 /*@C
2438    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2439    (block compressed row).  For good matrix assembly performance
2440    the user should preallocate the matrix storage by setting the parameters
2441    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2442    performance can be increased by more than a factor of 50.
2443 
2444    Collective on MPI_Comm
2445 
2446    Input Parameters:
2447 +  comm - MPI communicator
2448 .  bs   - size of blockk
2449 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2450            This value should be the same as the local size used in creating the
2451            y vector for the matrix-vector product y = Ax.
2452 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2453            This value should be the same as the local size used in creating the
2454            x vector for the matrix-vector product y = Ax.
2455 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2456 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2457 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2458            submatrix  (same for all local rows)
2459 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2460            of the in diagonal portion of the local (possibly different for each block
2461            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2462 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2463            submatrix (same for all local rows).
2464 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2465            off-diagonal portion of the local submatrix (possibly different for
2466            each block row) or PETSC_NULL.
2467 
2468    Output Parameter:
2469 .  A - the matrix
2470 
2471    Options Database Keys:
2472 .   -mat_no_unroll - uses code that does not unroll the loops in the
2473                      block calculations (much slower)
2474 .   -mat_block_size - size of the blocks to use
2475 
2476    Notes:
2477    If the *_nnz parameter is given then the *_nz parameter is ignored
2478 
2479    A nonzero block is any block that as 1 or more nonzeros in it
2480 
2481    The user MUST specify either the local or global matrix dimensions
2482    (possibly both).
2483 
2484    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2485    than it must be used on all processors that share the object for that argument.
2486 
2487    Storage Information:
2488    For a square global matrix we define each processor's diagonal portion
2489    to be its local rows and the corresponding columns (a square submatrix);
2490    each processor's off-diagonal portion encompasses the remainder of the
2491    local matrix (a rectangular submatrix).
2492 
2493    The user can specify preallocated storage for the diagonal part of
2494    the local submatrix with either d_nz or d_nnz (not both).  Set
2495    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2496    memory allocation.  Likewise, specify preallocated storage for the
2497    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2498 
2499    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2500    the figure below we depict these three local rows and all columns (0-11).
2501 
2502 .vb
2503            0 1 2 3 4 5 6 7 8 9 10 11
2504           -------------------
2505    row 3  |  o o o d d d o o o o o o
2506    row 4  |  o o o d d d o o o o o o
2507    row 5  |  o o o d d d o o o o o o
2508           -------------------
2509 .ve
2510 
2511    Thus, any entries in the d locations are stored in the d (diagonal)
2512    submatrix, and any entries in the o locations are stored in the
2513    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2514    stored simply in the MATSEQBAIJ format for compressed row storage.
2515 
2516    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2517    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2518    In general, for PDE problems in which most nonzeros are near the diagonal,
2519    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2520    or you will get TERRIBLE performance; see the users' manual chapter on
2521    matrices.
2522 
2523    Level: intermediate
2524 
2525 .keywords: matrix, block, aij, compressed row, sparse, parallel
2526 
2527 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2528 @*/
2529 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2530 {
2531   PetscErrorCode ierr;
2532   PetscMPIInt    size;
2533 
2534   PetscFunctionBegin;
2535   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2536   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2537   if (size > 1) {
2538     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2539     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2540   } else {
2541     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2542     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2543   }
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2549 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2550 {
2551   Mat            mat;
2552   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2553   PetscErrorCode ierr;
2554   PetscInt       len=0;
2555 
2556   PetscFunctionBegin;
2557   *newmat       = 0;
2558   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2559   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2560   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2561 
2562   mat->factor       = matin->factor;
2563   mat->preallocated = PETSC_TRUE;
2564   mat->assembled    = PETSC_TRUE;
2565   mat->insertmode   = NOT_SET_VALUES;
2566 
2567   a      = (Mat_MPIBAIJ*)mat->data;
2568   mat->bs  = matin->bs;
2569   a->bs2   = oldmat->bs2;
2570   a->mbs   = oldmat->mbs;
2571   a->nbs   = oldmat->nbs;
2572   a->Mbs   = oldmat->Mbs;
2573   a->Nbs   = oldmat->Nbs;
2574 
2575   a->rstart       = oldmat->rstart;
2576   a->rend         = oldmat->rend;
2577   a->cstart       = oldmat->cstart;
2578   a->cend         = oldmat->cend;
2579   a->size         = oldmat->size;
2580   a->rank         = oldmat->rank;
2581   a->donotstash   = oldmat->donotstash;
2582   a->roworiented  = oldmat->roworiented;
2583   a->rowindices   = 0;
2584   a->rowvalues    = 0;
2585   a->getrowactive = PETSC_FALSE;
2586   a->barray       = 0;
2587   a->rstart_bs    = oldmat->rstart_bs;
2588   a->rend_bs      = oldmat->rend_bs;
2589   a->cstart_bs    = oldmat->cstart_bs;
2590   a->cend_bs      = oldmat->cend_bs;
2591 
2592   /* hash table stuff */
2593   a->ht           = 0;
2594   a->hd           = 0;
2595   a->ht_size      = 0;
2596   a->ht_flag      = oldmat->ht_flag;
2597   a->ht_fact      = oldmat->ht_fact;
2598   a->ht_total_ct  = 0;
2599   a->ht_insert_ct = 0;
2600 
2601   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2602   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2603   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2604   if (oldmat->colmap) {
2605 #if defined (PETSC_USE_CTABLE)
2606   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2607 #else
2608   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2609   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2610   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2611 #endif
2612   } else a->colmap = 0;
2613 
2614   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2615     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2616     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2617     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2618   } else a->garray = 0;
2619 
2620   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2621   PetscLogObjectParent(mat,a->lvec);
2622   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2623   PetscLogObjectParent(mat,a->Mvctx);
2624 
2625   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2626   PetscLogObjectParent(mat,a->A);
2627   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2628   PetscLogObjectParent(mat,a->B);
2629   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2630   *newmat = mat;
2631 
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #include "petscsys.h"
2636 
2637 #undef __FUNCT__
2638 #define __FUNCT__ "MatLoad_MPIBAIJ"
2639 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2640 {
2641   Mat            A;
2642   PetscErrorCode ierr;
2643   int            fd;
2644   PetscInt       i,nz,j,rstart,rend;
2645   PetscScalar    *vals,*buf;
2646   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2647   MPI_Status     status;
2648   PetscMPIInt    rank,size,maxnz;
2649   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2650   PetscInt       *locrowlens,*procsnz = 0,*browners;
2651   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows;
2652   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2653   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2654   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2655 
2656   PetscFunctionBegin;
2657   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2658 
2659   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2660   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2661   if (!rank) {
2662     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2663     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2664     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2665   }
2666 
2667   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2668   M = header[1]; N = header[2];
2669 
2670   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2671 
2672   /*
2673      This code adds extra rows to make sure the number of rows is
2674      divisible by the blocksize
2675   */
2676   Mbs        = M/bs;
2677   extra_rows = bs - M + bs*Mbs;
2678   if (extra_rows == bs) extra_rows = 0;
2679   else                  Mbs++;
2680   if (extra_rows && !rank) {
2681     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2682   }
2683 
2684   /* determine ownership of all rows */
2685   mbs        = Mbs/size + ((Mbs % size) > rank);
2686   m          = mbs*bs;
2687   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2688   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2689   rowners[0] = 0;
2690   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2691   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2692   rstart = rowners[rank];
2693   rend   = rowners[rank+1];
2694 
2695   /* distribute row lengths to all processors */
2696   ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2697   if (!rank) {
2698     mend = m;
2699     if (size == 1) mend = mend - extra_rows;
2700     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2701     for (j=mend; j<m; j++) locrowlens[j] = 1;
2702     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2703     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2704     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2705     for (j=0; j<m; j++) {
2706       procsnz[0] += locrowlens[j];
2707     }
2708     for (i=1; i<size; i++) {
2709       mend = browners[i+1] - browners[i];
2710       if (i == size-1) mend = mend - extra_rows;
2711       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2712       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2713       /* calculate the number of nonzeros on each processor */
2714       for (j=0; j<browners[i+1]-browners[i]; j++) {
2715         procsnz[i] += rowlengths[j];
2716       }
2717       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2718     }
2719     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2720   } else {
2721     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2722   }
2723 
2724   if (!rank) {
2725     /* determine max buffer needed and allocate it */
2726     maxnz = procsnz[0];
2727     for (i=1; i<size; i++) {
2728       maxnz = PetscMax(maxnz,procsnz[i]);
2729     }
2730     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2731 
2732     /* read in my part of the matrix column indices  */
2733     nz     = procsnz[0];
2734     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2735     mycols = ibuf;
2736     if (size == 1)  nz -= extra_rows;
2737     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2738     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2739 
2740     /* read in every ones (except the last) and ship off */
2741     for (i=1; i<size-1; i++) {
2742       nz   = procsnz[i];
2743       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2744       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2745     }
2746     /* read in the stuff for the last proc */
2747     if (size != 1) {
2748       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2749       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2750       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2751       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2752     }
2753     ierr = PetscFree(cols);CHKERRQ(ierr);
2754   } else {
2755     /* determine buffer space needed for message */
2756     nz = 0;
2757     for (i=0; i<m; i++) {
2758       nz += locrowlens[i];
2759     }
2760     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2761     mycols = ibuf;
2762     /* receive message of column indices*/
2763     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2764     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2765     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2766   }
2767 
2768   /* loop over local rows, determining number of off diagonal entries */
2769   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2770   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2771   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2772   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2773   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2774   rowcount = 0; nzcount = 0;
2775   for (i=0; i<mbs; i++) {
2776     dcount  = 0;
2777     odcount = 0;
2778     for (j=0; j<bs; j++) {
2779       kmax = locrowlens[rowcount];
2780       for (k=0; k<kmax; k++) {
2781         tmp = mycols[nzcount++]/bs;
2782         if (!mask[tmp]) {
2783           mask[tmp] = 1;
2784           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2785           else masked1[dcount++] = tmp;
2786         }
2787       }
2788       rowcount++;
2789     }
2790 
2791     dlens[i]  = dcount;
2792     odlens[i] = odcount;
2793 
2794     /* zero out the mask elements we set */
2795     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2796     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2797   }
2798 
2799   /* create our matrix */
2800   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2801   ierr = MatSetType(A,type);CHKERRQ(ierr)
2802   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2803 
2804   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2805   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2806 
2807   if (!rank) {
2808     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2809     /* read in my part of the matrix numerical values  */
2810     nz = procsnz[0];
2811     vals = buf;
2812     mycols = ibuf;
2813     if (size == 1)  nz -= extra_rows;
2814     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2815     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2816 
2817     /* insert into matrix */
2818     jj      = rstart*bs;
2819     for (i=0; i<m; i++) {
2820       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2821       mycols += locrowlens[i];
2822       vals   += locrowlens[i];
2823       jj++;
2824     }
2825     /* read in other processors (except the last one) and ship out */
2826     for (i=1; i<size-1; i++) {
2827       nz   = procsnz[i];
2828       vals = buf;
2829       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2830       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2831     }
2832     /* the last proc */
2833     if (size != 1){
2834       nz   = procsnz[i] - extra_rows;
2835       vals = buf;
2836       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2837       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2838       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2839     }
2840     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2841   } else {
2842     /* receive numeric values */
2843     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2844 
2845     /* receive message of values*/
2846     vals   = buf;
2847     mycols = ibuf;
2848     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2849     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2850     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2851 
2852     /* insert into matrix */
2853     jj      = rstart*bs;
2854     for (i=0; i<m; i++) {
2855       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2856       mycols += locrowlens[i];
2857       vals   += locrowlens[i];
2858       jj++;
2859     }
2860   }
2861   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2862   ierr = PetscFree(buf);CHKERRQ(ierr);
2863   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2864   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2865   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2866   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2867   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2868   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2869 
2870   *newmat = A;
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2876 /*@
2877    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2878 
2879    Input Parameters:
2880 .  mat  - the matrix
2881 .  fact - factor
2882 
2883    Collective on Mat
2884 
2885    Level: advanced
2886 
2887   Notes:
2888    This can also be set by the command line option: -mat_use_hash_table fact
2889 
2890 .keywords: matrix, hashtable, factor, HT
2891 
2892 .seealso: MatSetOption()
2893 @*/
2894 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2895 {
2896   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2897 
2898   PetscFunctionBegin;
2899   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2900   if (f) {
2901     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2902   }
2903   PetscFunctionReturn(0);
2904 }
2905 
2906 #undef __FUNCT__
2907 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2908 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2909 {
2910   Mat_MPIBAIJ *baij;
2911 
2912   PetscFunctionBegin;
2913   baij = (Mat_MPIBAIJ*)mat->data;
2914   baij->ht_fact = fact;
2915   PetscFunctionReturn(0);
2916 }
2917 
2918 #undef __FUNCT__
2919 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2920 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2921 {
2922   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2923   PetscFunctionBegin;
2924   *Ad     = a->A;
2925   *Ao     = a->B;
2926   *colmap = a->garray;
2927   PetscFunctionReturn(0);
2928 }
2929