xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision dcf5cc722893849f9d6a49faec7bb9e7955fb956)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 
4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
109   PetscErrorCode ierr;
110   PetscInt       nbs = B->nbs,i,bs=mat->bs;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
286 {
287   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       i,N = m*n;
290   MatScalar      *vsingle;
291 
292   PetscFunctionBegin;
293   if (N > b->setvalueslen) {
294     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
295     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
296     b->setvalueslen  = N;
297   }
298   vsingle = b->setvaluescopy;
299 
300   for (i=0; i<N; i++) {
301     vsingle[i] = v[i];
302   }
303   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
310 {
311   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
312   PetscErrorCode ierr;
313   PetscInt       i,N = m*n*b->bs2;
314   MatScalar      *vsingle;
315 
316   PetscFunctionBegin;
317   if (N > b->setvalueslen) {
318     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
319     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
320     b->setvalueslen  = N;
321   }
322   vsingle = b->setvaluescopy;
323   for (i=0; i<N; i++) {
324     vsingle[i] = v[i];
325   }
326   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
333 {
334   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
335   PetscErrorCode ierr;
336   PetscInt       i,N = m*n;
337   MatScalar      *vsingle;
338 
339   PetscFunctionBegin;
340   if (N > b->setvalueslen) {
341     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
342     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
343     b->setvalueslen  = N;
344   }
345   vsingle = b->setvaluescopy;
346   for (i=0; i<N; i++) {
347     vsingle[i] = v[i];
348   }
349   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
356 {
357   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
358   PetscErrorCode ierr;
359   PetscInt       i,N = m*n*b->bs2;
360   MatScalar      *vsingle;
361 
362   PetscFunctionBegin;
363   if (N > b->setvalueslen) {
364     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
365     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
366     b->setvalueslen  = N;
367   }
368   vsingle = b->setvaluescopy;
369   for (i=0; i<N; i++) {
370     vsingle[i] = v[i];
371   }
372   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 #endif
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
380 {
381   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
382   MatScalar      value;
383   PetscTruth     roworiented = baij->roworiented;
384   PetscErrorCode ierr;
385   PetscInt       i,j,row,col;
386   PetscInt       rstart_orig=baij->rstart_bs;
387   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
388   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
389 
390   /* Some Variables required in the macro */
391   Mat            A = baij->A;
392   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
393   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
394   MatScalar      *aa=a->a;
395 
396   Mat            B = baij->B;
397   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
398   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
399   MatScalar      *ba=b->a;
400 
401   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
402   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
403   MatScalar      *ap,*bap;
404 
405   PetscFunctionBegin;
406   for (i=0; i<m; i++) {
407     if (im[i] < 0) continue;
408 #if defined(PETSC_USE_BOPT_g)
409     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
410 #endif
411     if (im[i] >= rstart_orig && im[i] < rend_orig) {
412       row = im[i] - rstart_orig;
413       for (j=0; j<n; j++) {
414         if (in[j] >= cstart_orig && in[j] < cend_orig){
415           col = in[j] - cstart_orig;
416           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
417           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
418           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_BOPT_g)
421         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!baij->colmap) {
426               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
430             col  = col - 1;
431 #else
432             col = baij->colmap[in[j]/bs] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
435               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
438               B = baij->B;
439               b = (Mat_SeqBAIJ*)(B)->data;
440               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
441               ba=b->a;
442             } else col += in[j]%bs;
443           } else col = in[j];
444           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
445           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
446           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
447         }
448       }
449     } else {
450       if (!baij->donotstash) {
451         if (roworiented) {
452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453         } else {
454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
455         }
456       }
457     }
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
467   const MatScalar *value;
468   MatScalar       *barray=baij->barray;
469   PetscTruth      roworiented = baij->roworiented;
470   PetscErrorCode  ierr;
471   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
472   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
473   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
474 
475   PetscFunctionBegin;
476   if(!barray) {
477     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
478     baij->barray = barray;
479   }
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487     if (im[i] < 0) continue;
488 #if defined(PETSC_USE_BOPT_g)
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     if (im[i] >= rstart && im[i] < rend) {
492       row = im[i] - rstart;
493       for (j=0; j<n; j++) {
494         /* If NumCol = 1 then a copy is not required */
495         if ((roworiented) && (n == 1)) {
496           barray = (MatScalar*)v + i*bs2;
497         } else if((!roworiented) && (m == 1)) {
498           barray = (MatScalar*)v + j*bs2;
499         } else { /* Here a copy is required */
500           if (roworiented) {
501             value = v + i*(stepval+bs)*bs + j*bs;
502           } else {
503             value = v + j*(stepval+bs)*bs + i*bs;
504           }
505           for (ii=0; ii<bs; ii++,value+=stepval) {
506             for (jj=0; jj<bs; jj++) {
507               *barray++  = *value++;
508             }
509           }
510           barray -=bs2;
511         }
512 
513         if (in[j] >= cstart && in[j] < cend){
514           col  = in[j] - cstart;
515           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516         }
517         else if (in[j] < 0) continue;
518 #if defined(PETSC_USE_BOPT_g)
519         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
520 #endif
521         else {
522           if (mat->was_assembled) {
523             if (!baij->colmap) {
524               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
525             }
526 
527 #if defined(PETSC_USE_BOPT_g)
528 #if defined (PETSC_USE_CTABLE)
529             { PetscInt data;
530               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
531               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
532             }
533 #else
534             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
535 #endif
536 #endif
537 #if defined (PETSC_USE_CTABLE)
538 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
539             col  = (col - 1)/bs;
540 #else
541             col = (baij->colmap[in[j]] - 1)/bs;
542 #endif
543             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
544               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
545               col =  in[j];
546             }
547           }
548           else col = in[j];
549           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
550         }
551       }
552     } else {
553       if (!baij->donotstash) {
554         if (roworiented) {
555           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556         } else {
557           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
558         }
559       }
560     }
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #define HASH_KEY 0.6180339887
566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
569 #undef __FUNCT__
570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
572 {
573   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
574   PetscTruth     roworiented = baij->roworiented;
575   PetscErrorCode ierr;
576   PetscInt       i,j,row,col;
577   PetscInt       rstart_orig=baij->rstart_bs;
578   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
579   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
580   PetscReal      tmp;
581   MatScalar      **HD = baij->hd,value;
582 #if defined(PETSC_USE_BOPT_g)
583   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
584 #endif
585 
586   PetscFunctionBegin;
587 
588   for (i=0; i<m; i++) {
589 #if defined(PETSC_USE_BOPT_g)
590     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
591     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
592 #endif
593       row = im[i];
594     if (row >= rstart_orig && row < rend_orig) {
595       for (j=0; j<n; j++) {
596         col = in[j];
597         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
598         /* Look up PetscInto the Hash Table */
599         key = (row/bs)*Nbs+(col/bs)+1;
600         h1  = HASH(size,key,tmp);
601 
602 
603         idx = h1;
604 #if defined(PETSC_USE_BOPT_g)
605         insert_ct++;
606         total_ct++;
607         if (HT[idx] != key) {
608           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
609           if (idx == size) {
610             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
611             if (idx == h1) {
612               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
613             }
614           }
615         }
616 #else
617         if (HT[idx] != key) {
618           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
619           if (idx == size) {
620             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
621             if (idx == h1) {
622               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
623             }
624           }
625         }
626 #endif
627         /* A HASH table entry is found, so insert the values at the correct address */
628         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
629         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
630       }
631     } else {
632       if (!baij->donotstash) {
633         if (roworiented) {
634           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
635         } else {
636           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
637         }
638       }
639     }
640   }
641 #if defined(PETSC_USE_BOPT_g)
642   baij->ht_total_ct = total_ct;
643   baij->ht_insert_ct = insert_ct;
644 #endif
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
651 {
652   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
653   PetscTruth      roworiented = baij->roworiented;
654   PetscErrorCode  ierr;
655   PetscInt        i,j,ii,jj,row,col;
656   PetscInt        rstart=baij->rstart ;
657   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
658   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
659   PetscReal       tmp;
660   MatScalar       **HD = baij->hd,*baij_a;
661   const MatScalar *v_t,*value;
662 #if defined(PETSC_USE_BOPT_g)
663   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
664 #endif
665 
666   PetscFunctionBegin;
667 
668   if (roworiented) {
669     stepval = (n-1)*bs;
670   } else {
671     stepval = (m-1)*bs;
672   }
673   for (i=0; i<m; i++) {
674 #if defined(PETSC_USE_BOPT_g)
675     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
676     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
677 #endif
678     row   = im[i];
679     v_t   = v + i*bs2;
680     if (row >= rstart && row < rend) {
681       for (j=0; j<n; j++) {
682         col = in[j];
683 
684         /* Look up into the Hash Table */
685         key = row*Nbs+col+1;
686         h1  = HASH(size,key,tmp);
687 
688         idx = h1;
689 #if defined(PETSC_USE_BOPT_g)
690         total_ct++;
691         insert_ct++;
692        if (HT[idx] != key) {
693           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
694           if (idx == size) {
695             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
696             if (idx == h1) {
697               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
698             }
699           }
700         }
701 #else
702         if (HT[idx] != key) {
703           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
704           if (idx == size) {
705             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
706             if (idx == h1) {
707               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
708             }
709           }
710         }
711 #endif
712         baij_a = HD[idx];
713         if (roworiented) {
714           /*value = v + i*(stepval+bs)*bs + j*bs;*/
715           /* value = v + (i*(stepval+bs)+j)*bs; */
716           value = v_t;
717           v_t  += bs;
718           if (addv == ADD_VALUES) {
719             for (ii=0; ii<bs; ii++,value+=stepval) {
720               for (jj=ii; jj<bs2; jj+=bs) {
721                 baij_a[jj]  += *value++;
722               }
723             }
724           } else {
725             for (ii=0; ii<bs; ii++,value+=stepval) {
726               for (jj=ii; jj<bs2; jj+=bs) {
727                 baij_a[jj]  = *value++;
728               }
729             }
730           }
731         } else {
732           value = v + j*(stepval+bs)*bs + i*bs;
733           if (addv == ADD_VALUES) {
734             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
735               for (jj=0; jj<bs; jj++) {
736                 baij_a[jj]  += *value++;
737               }
738             }
739           } else {
740             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
741               for (jj=0; jj<bs; jj++) {
742                 baij_a[jj]  = *value++;
743               }
744             }
745           }
746         }
747       }
748     } else {
749       if (!baij->donotstash) {
750         if (roworiented) {
751           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
752         } else {
753           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
754         }
755       }
756     }
757   }
758 #if defined(PETSC_USE_BOPT_g)
759   baij->ht_total_ct = total_ct;
760   baij->ht_insert_ct = insert_ct;
761 #endif
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatGetValues_MPIBAIJ"
767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
768 {
769   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
770   PetscErrorCode ierr;
771   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
772   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
773 
774   PetscFunctionBegin;
775   for (i=0; i<m; i++) {
776     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
777     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
778     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
779       row = idxm[i] - bsrstart;
780       for (j=0; j<n; j++) {
781         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
782         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
783         if (idxn[j] >= bscstart && idxn[j] < bscend){
784           col = idxn[j] - bscstart;
785           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786         } else {
787           if (!baij->colmap) {
788             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
789           }
790 #if defined (PETSC_USE_CTABLE)
791           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
792           data --;
793 #else
794           data = baij->colmap[idxn[j]/bs]-1;
795 #endif
796           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
797           else {
798             col  = data + idxn[j]%bs;
799             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
800           }
801         }
802       }
803     } else {
804       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
805     }
806   }
807  PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatNorm_MPIBAIJ"
812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
813 {
814   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
815   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
816   PetscErrorCode ierr;
817   PetscInt       i,bs2=baij->bs2;
818   PetscReal      sum = 0.0;
819   MatScalar      *v;
820 
821   PetscFunctionBegin;
822   if (baij->size == 1) {
823     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
824   } else {
825     if (type == NORM_FROBENIUS) {
826       v = amat->a;
827       for (i=0; i<amat->nz*bs2; i++) {
828 #if defined(PETSC_USE_COMPLEX)
829         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
830 #else
831         sum += (*v)*(*v); v++;
832 #endif
833       }
834       v = bmat->a;
835       for (i=0; i<bmat->nz*bs2; i++) {
836 #if defined(PETSC_USE_COMPLEX)
837         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
838 #else
839         sum += (*v)*(*v); v++;
840 #endif
841       }
842       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
843       *nrm = sqrt(*nrm);
844     } else {
845       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
846     }
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 
852 /*
853   Creates the hash table, and sets the table
854   This table is created only once.
855   If new entried need to be added to the matrix
856   then the hash table has to be destroyed and
857   recreated.
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
862 {
863   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
864   Mat            A = baij->A,B=baij->B;
865   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
866   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
867   PetscErrorCode ierr;
868   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
869   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
870   PetscInt       *HT,key;
871   MatScalar      **HD;
872   PetscReal      tmp;
873 #if defined(PETSC_USE_BOPT_g)
874   PetscInt       ct=0,max=0;
875 #endif
876 
877   PetscFunctionBegin;
878   baij->ht_size=(PetscInt)(factor*nz);
879   size = baij->ht_size;
880 
881   if (baij->ht) {
882     PetscFunctionReturn(0);
883   }
884 
885   /* Allocate Memory for Hash Table */
886   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
887   baij->ht = (PetscInt*)(baij->hd + size);
888   HD       = baij->hd;
889   HT       = baij->ht;
890 
891 
892   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
893 
894 
895   /* Loop Over A */
896   for (i=0; i<a->mbs; i++) {
897     for (j=ai[i]; j<ai[i+1]; j++) {
898       row = i+rstart;
899       col = aj[j]+cstart;
900 
901       key = row*Nbs + col + 1;
902       h1  = HASH(size,key,tmp);
903       for (k=0; k<size; k++){
904         if (!HT[(h1+k)%size]) {
905           HT[(h1+k)%size] = key;
906           HD[(h1+k)%size] = a->a + j*bs2;
907           break;
908 #if defined(PETSC_USE_BOPT_g)
909         } else {
910           ct++;
911 #endif
912         }
913       }
914 #if defined(PETSC_USE_BOPT_g)
915       if (k> max) max = k;
916 #endif
917     }
918   }
919   /* Loop Over B */
920   for (i=0; i<b->mbs; i++) {
921     for (j=bi[i]; j<bi[i+1]; j++) {
922       row = i+rstart;
923       col = garray[bj[j]];
924       key = row*Nbs + col + 1;
925       h1  = HASH(size,key,tmp);
926       for (k=0; k<size; k++){
927         if (!HT[(h1+k)%size]) {
928           HT[(h1+k)%size] = key;
929           HD[(h1+k)%size] = b->a + j*bs2;
930           break;
931 #if defined(PETSC_USE_BOPT_g)
932         } else {
933           ct++;
934 #endif
935         }
936       }
937 #if defined(PETSC_USE_BOPT_g)
938       if (k> max) max = k;
939 #endif
940     }
941   }
942 
943   /* Print Summary */
944 #if defined(PETSC_USE_BOPT_g)
945   for (i=0,j=0; i<size; i++) {
946     if (HT[i]) {j++;}
947   }
948   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
949 #endif
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
956 {
957   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
958   PetscErrorCode ierr;
959   PetscInt       nstash,reallocs;
960   InsertMode     addv;
961 
962   PetscFunctionBegin;
963   if (baij->donotstash) {
964     PetscFunctionReturn(0);
965   }
966 
967   /* make sure all processors are either in INSERTMODE or ADDMODE */
968   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
969   if (addv == (ADD_VALUES|INSERT_VALUES)) {
970     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
971   }
972   mat->insertmode = addv; /* in case this processor had no cache */
973 
974   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
975   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
976   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
977   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
978   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
979   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
986 {
987   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
988   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
989   PetscErrorCode ierr;
990   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
991   PetscInt       *row,*col,other_disassembled;
992   PetscTruth     r1,r2,r3;
993   MatScalar      *val;
994   InsertMode     addv = mat->insertmode;
995   PetscMPIInt    n;
996 
997   PetscFunctionBegin;
998   if (!baij->donotstash) {
999     while (1) {
1000       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1001       if (!flg) break;
1002 
1003       for (i=0; i<n;) {
1004         /* Now identify the consecutive vals belonging to the same row */
1005         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1006         if (j < n) ncols = j-i;
1007         else       ncols = n-i;
1008         /* Now assemble all these values with a single function call */
1009         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1010         i = j;
1011       }
1012     }
1013     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1014     /* Now process the block-stash. Since the values are stashed column-oriented,
1015        set the roworiented flag to column oriented, and after MatSetValues()
1016        restore the original flags */
1017     r1 = baij->roworiented;
1018     r2 = a->roworiented;
1019     r3 = b->roworiented;
1020     baij->roworiented = PETSC_FALSE;
1021     a->roworiented    = PETSC_FALSE;
1022     b->roworiented    = PETSC_FALSE;
1023     while (1) {
1024       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1025       if (!flg) break;
1026 
1027       for (i=0; i<n;) {
1028         /* Now identify the consecutive vals belonging to the same row */
1029         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1030         if (j < n) ncols = j-i;
1031         else       ncols = n-i;
1032         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1033         i = j;
1034       }
1035     }
1036     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1037     baij->roworiented = r1;
1038     a->roworiented    = r2;
1039     b->roworiented    = r3;
1040   }
1041 
1042   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1043   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1044 
1045   /* determine if any processor has disassembled, if so we must
1046      also disassemble ourselfs, in order that we may reassemble. */
1047   /*
1048      if nonzero structure of submatrix B cannot change then we know that
1049      no processor disassembled thus we can skip this stuff
1050   */
1051   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1052     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1053     if (mat->was_assembled && !other_disassembled) {
1054       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1055     }
1056   }
1057 
1058   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1059     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1060   }
1061   b->compressedrow.use = PETSC_TRUE;
1062   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1063   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1064 
1065 #if defined(PETSC_USE_BOPT_g)
1066   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1067     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1068     baij->ht_total_ct  = 0;
1069     baij->ht_insert_ct = 0;
1070   }
1071 #endif
1072   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1073     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1074     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1075     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1076   }
1077 
1078   if (baij->rowvalues) {
1079     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1080     baij->rowvalues = 0;
1081   }
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1087 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1088 {
1089   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1090   PetscErrorCode    ierr;
1091   PetscMPIInt       size = baij->size,rank = baij->rank;
1092   PetscInt          bs = mat->bs;
1093   PetscTruth        iascii,isdraw;
1094   PetscViewer       sviewer;
1095   PetscViewerFormat format;
1096 
1097   PetscFunctionBegin;
1098   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1099   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1100   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1101   if (iascii) {
1102     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1103     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1104       MatInfo info;
1105       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1106       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1107       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1108               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1109               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1110       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1111       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1112       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1113       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1114       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1115       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1116       PetscFunctionReturn(0);
1117     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1118       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1119       PetscFunctionReturn(0);
1120     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1121       PetscFunctionReturn(0);
1122     }
1123   }
1124 
1125   if (isdraw) {
1126     PetscDraw       draw;
1127     PetscTruth isnull;
1128     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1129     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1130   }
1131 
1132   if (size == 1) {
1133     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1134     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1135   } else {
1136     /* assemble the entire matrix onto first processor. */
1137     Mat         A;
1138     Mat_SeqBAIJ *Aloc;
1139     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1140     MatScalar   *a;
1141 
1142     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1143     /* Perhaps this should be the type of mat? */
1144     if (!rank) {
1145       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1146     } else {
1147       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1148     }
1149     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1150     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1151     PetscLogObjectParent(mat,A);
1152 
1153     /* copy over the A part */
1154     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1155     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1156     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1157 
1158     for (i=0; i<mbs; i++) {
1159       rvals[0] = bs*(baij->rstart + i);
1160       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1161       for (j=ai[i]; j<ai[i+1]; j++) {
1162         col = (baij->cstart+aj[j])*bs;
1163         for (k=0; k<bs; k++) {
1164           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1165           col++; a += bs;
1166         }
1167       }
1168     }
1169     /* copy over the B part */
1170     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1171     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1172     for (i=0; i<mbs; i++) {
1173       rvals[0] = bs*(baij->rstart + i);
1174       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1175       for (j=ai[i]; j<ai[i+1]; j++) {
1176         col = baij->garray[aj[j]]*bs;
1177         for (k=0; k<bs; k++) {
1178           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1179           col++; a += bs;
1180         }
1181       }
1182     }
1183     ierr = PetscFree(rvals);CHKERRQ(ierr);
1184     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1185     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1186     /*
1187        Everyone has to call to draw the matrix since the graphics waits are
1188        synchronized across all processors that share the PetscDraw object
1189     */
1190     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1191     if (!rank) {
1192       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1193       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1194     }
1195     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1196     ierr = MatDestroy(A);CHKERRQ(ierr);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatView_MPIBAIJ"
1203 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1204 {
1205   PetscErrorCode ierr;
1206   PetscTruth     iascii,isdraw,issocket,isbinary;
1207 
1208   PetscFunctionBegin;
1209   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1210   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1211   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1212   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1213   if (iascii || isdraw || issocket || isbinary) {
1214     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1215   } else {
1216     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1217   }
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1223 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1224 {
1225   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229 #if defined(PETSC_USE_LOG)
1230   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1231 #endif
1232   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1233   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1234   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1235   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1236   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1237 #if defined (PETSC_USE_CTABLE)
1238   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1239 #else
1240   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1241 #endif
1242   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1243   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1244   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1245   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1246   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1247   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1248 #if defined(PETSC_USE_MAT_SINGLE)
1249   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1250 #endif
1251   ierr = PetscFree(baij);CHKERRQ(ierr);
1252 
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1256   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1257   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1258   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1259   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatMult_MPIBAIJ"
1265 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1266 {
1267   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1268   PetscErrorCode ierr;
1269   PetscInt       nt;
1270 
1271   PetscFunctionBegin;
1272   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1273   if (nt != A->n) {
1274     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1275   }
1276   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1277   if (nt != A->m) {
1278     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1279   }
1280   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1281   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1282   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1283   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1284   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1290 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1291 {
1292   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1293   PetscErrorCode ierr;
1294 
1295   PetscFunctionBegin;
1296   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1297   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1298   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1299   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1305 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1306 {
1307   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309 
1310   PetscFunctionBegin;
1311   /* do nondiagonal part */
1312   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1313   /* send it on its way */
1314   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1315   /* do local part */
1316   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1317   /* receive remote parts: note this assumes the values are not actually */
1318   /* inserted in yy until the next line, which is true for my implementation*/
1319   /* but is not perhaps always true. */
1320   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1326 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   /* do nondiagonal part */
1333   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1334   /* send it on its way */
1335   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1336   /* do local part */
1337   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1338   /* receive remote parts: note this assumes the values are not actually */
1339   /* inserted in yy until the next line, which is true for my implementation*/
1340   /* but is not perhaps always true. */
1341   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 /*
1346   This only works correctly for square matrices where the subblock A->A is the
1347    diagonal block
1348 */
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1351 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1352 {
1353   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1358   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatScale_MPIBAIJ"
1364 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1365 {
1366   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1367   PetscErrorCode ierr;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1371   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1377 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1378 {
1379   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1380   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1381   PetscErrorCode ierr;
1382   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1383   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1384   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1385 
1386   PetscFunctionBegin;
1387   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1388   mat->getrowactive = PETSC_TRUE;
1389 
1390   if (!mat->rowvalues && (idx || v)) {
1391     /*
1392         allocate enough space to hold information from the longest row.
1393     */
1394     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1395     PetscInt     max = 1,mbs = mat->mbs,tmp;
1396     for (i=0; i<mbs; i++) {
1397       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1398       if (max < tmp) { max = tmp; }
1399     }
1400     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1401     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1402   }
1403 
1404   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1405   lrow = row - brstart;
1406 
1407   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1408   if (!v)   {pvA = 0; pvB = 0;}
1409   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1410   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1411   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1412   nztot = nzA + nzB;
1413 
1414   cmap  = mat->garray;
1415   if (v  || idx) {
1416     if (nztot) {
1417       /* Sort by increasing column numbers, assuming A and B already sorted */
1418       PetscInt imark = -1;
1419       if (v) {
1420         *v = v_p = mat->rowvalues;
1421         for (i=0; i<nzB; i++) {
1422           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1423           else break;
1424         }
1425         imark = i;
1426         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1427         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1428       }
1429       if (idx) {
1430         *idx = idx_p = mat->rowindices;
1431         if (imark > -1) {
1432           for (i=0; i<imark; i++) {
1433             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1434           }
1435         } else {
1436           for (i=0; i<nzB; i++) {
1437             if (cmap[cworkB[i]/bs] < cstart)
1438               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1439             else break;
1440           }
1441           imark = i;
1442         }
1443         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1444         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1445       }
1446     } else {
1447       if (idx) *idx = 0;
1448       if (v)   *v   = 0;
1449     }
1450   }
1451   *nz = nztot;
1452   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1453   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1459 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1460 {
1461   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1462 
1463   PetscFunctionBegin;
1464   if (baij->getrowactive == PETSC_FALSE) {
1465     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1466   }
1467   baij->getrowactive = PETSC_FALSE;
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1473 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1474 {
1475   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477 
1478   PetscFunctionBegin;
1479   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1480   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 #undef __FUNCT__
1485 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1486 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1487 {
1488   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1489   Mat            A = a->A,B = a->B;
1490   PetscErrorCode ierr;
1491   PetscReal      isend[5],irecv[5];
1492 
1493   PetscFunctionBegin;
1494   info->block_size     = (PetscReal)matin->bs;
1495   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1496   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1497   isend[3] = info->memory;  isend[4] = info->mallocs;
1498   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1499   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1500   isend[3] += info->memory;  isend[4] += info->mallocs;
1501   if (flag == MAT_LOCAL) {
1502     info->nz_used      = isend[0];
1503     info->nz_allocated = isend[1];
1504     info->nz_unneeded  = isend[2];
1505     info->memory       = isend[3];
1506     info->mallocs      = isend[4];
1507   } else if (flag == MAT_GLOBAL_MAX) {
1508     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1509     info->nz_used      = irecv[0];
1510     info->nz_allocated = irecv[1];
1511     info->nz_unneeded  = irecv[2];
1512     info->memory       = irecv[3];
1513     info->mallocs      = irecv[4];
1514   } else if (flag == MAT_GLOBAL_SUM) {
1515     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1516     info->nz_used      = irecv[0];
1517     info->nz_allocated = irecv[1];
1518     info->nz_unneeded  = irecv[2];
1519     info->memory       = irecv[3];
1520     info->mallocs      = irecv[4];
1521   } else {
1522     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1523   }
1524   info->rows_global       = (PetscReal)A->M;
1525   info->columns_global    = (PetscReal)A->N;
1526   info->rows_local        = (PetscReal)A->m;
1527   info->columns_local     = (PetscReal)A->N;
1528   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1529   info->fill_ratio_needed = 0;
1530   info->factor_mallocs    = 0;
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1536 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1537 {
1538   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1539   PetscErrorCode ierr;
1540 
1541   PetscFunctionBegin;
1542   switch (op) {
1543   case MAT_NO_NEW_NONZERO_LOCATIONS:
1544   case MAT_YES_NEW_NONZERO_LOCATIONS:
1545   case MAT_COLUMNS_UNSORTED:
1546   case MAT_COLUMNS_SORTED:
1547   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1548   case MAT_KEEP_ZEROED_ROWS:
1549   case MAT_NEW_NONZERO_LOCATION_ERR:
1550     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1551     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1552     break;
1553   case MAT_ROW_ORIENTED:
1554     a->roworiented = PETSC_TRUE;
1555     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1556     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1557     break;
1558   case MAT_ROWS_SORTED:
1559   case MAT_ROWS_UNSORTED:
1560   case MAT_YES_NEW_DIAGONALS:
1561     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1562     break;
1563   case MAT_COLUMN_ORIENTED:
1564     a->roworiented = PETSC_FALSE;
1565     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1566     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1567     break;
1568   case MAT_IGNORE_OFF_PROC_ENTRIES:
1569     a->donotstash = PETSC_TRUE;
1570     break;
1571   case MAT_NO_NEW_DIAGONALS:
1572     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1573   case MAT_USE_HASH_TABLE:
1574     a->ht_flag = PETSC_TRUE;
1575     break;
1576   case MAT_SYMMETRIC:
1577   case MAT_STRUCTURALLY_SYMMETRIC:
1578   case MAT_HERMITIAN:
1579   case MAT_SYMMETRY_ETERNAL:
1580     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1581     break;
1582   case MAT_NOT_SYMMETRIC:
1583   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1584   case MAT_NOT_HERMITIAN:
1585   case MAT_NOT_SYMMETRY_ETERNAL:
1586     break;
1587   default:
1588     SETERRQ(PETSC_ERR_SUP,"unknown option");
1589   }
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1595 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1596 {
1597   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1598   Mat_SeqBAIJ    *Aloc;
1599   Mat            B;
1600   PetscErrorCode ierr;
1601   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1602   PetscInt       bs=A->bs,mbs=baij->mbs;
1603   MatScalar      *a;
1604 
1605   PetscFunctionBegin;
1606   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1607   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1608   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1609   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1610 
1611   /* copy over the A part */
1612   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1613   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1614   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1615 
1616   for (i=0; i<mbs; i++) {
1617     rvals[0] = bs*(baij->rstart + i);
1618     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1619     for (j=ai[i]; j<ai[i+1]; j++) {
1620       col = (baij->cstart+aj[j])*bs;
1621       for (k=0; k<bs; k++) {
1622         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1623         col++; a += bs;
1624       }
1625     }
1626   }
1627   /* copy over the B part */
1628   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1629   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1630   for (i=0; i<mbs; i++) {
1631     rvals[0] = bs*(baij->rstart + i);
1632     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1633     for (j=ai[i]; j<ai[i+1]; j++) {
1634       col = baij->garray[aj[j]]*bs;
1635       for (k=0; k<bs; k++) {
1636         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1637         col++; a += bs;
1638       }
1639     }
1640   }
1641   ierr = PetscFree(rvals);CHKERRQ(ierr);
1642   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644 
1645   if (matout) {
1646     *matout = B;
1647   } else {
1648     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1649   }
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 #undef __FUNCT__
1654 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1655 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1656 {
1657   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1658   Mat            a = baij->A,b = baij->B;
1659   PetscErrorCode ierr;
1660   PetscInt       s1,s2,s3;
1661 
1662   PetscFunctionBegin;
1663   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1664   if (rr) {
1665     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1666     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1667     /* Overlap communication with computation. */
1668     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1669   }
1670   if (ll) {
1671     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1672     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1673     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1674   }
1675   /* scale  the diagonal block */
1676   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1677 
1678   if (rr) {
1679     /* Do a scatter end and then right scale the off-diagonal block */
1680     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1681     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1682   }
1683 
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1689 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1690 {
1691   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1692   PetscErrorCode ierr;
1693   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1694   PetscInt       i,N,*rows,*owners = l->rowners;
1695   PetscInt       *nprocs,j,idx,nsends,row;
1696   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1697   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
1698   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1699   MPI_Comm       comm = A->comm;
1700   MPI_Request    *send_waits,*recv_waits;
1701   MPI_Status     recv_status,*send_status;
1702   IS             istmp;
1703   PetscTruth     found;
1704 
1705   PetscFunctionBegin;
1706   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1707   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1708 
1709   /*  first count number of contributors to each processor */
1710   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1711   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1712   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1713   for (i=0; i<N; i++) {
1714     idx   = rows[i];
1715     found = PETSC_FALSE;
1716     for (j=0; j<size; j++) {
1717       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1718         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1719       }
1720     }
1721     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1722   }
1723   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1724 
1725   /* inform other processors of number of messages and max length*/
1726   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1727 
1728   /* post receives:   */
1729   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1730   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1731   for (i=0; i<nrecvs; i++) {
1732     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1733   }
1734 
1735   /* do sends:
1736      1) starts[i] gives the starting index in svalues for stuff going to
1737      the ith processor
1738   */
1739   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1740   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1741   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1742   starts[0]  = 0;
1743   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1744   for (i=0; i<N; i++) {
1745     svalues[starts[owner[i]]++] = rows[i];
1746   }
1747   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1748 
1749   starts[0] = 0;
1750   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1751   count = 0;
1752   for (i=0; i<size; i++) {
1753     if (nprocs[2*i+1]) {
1754       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1755     }
1756   }
1757   ierr = PetscFree(starts);CHKERRQ(ierr);
1758 
1759   base = owners[rank]*bs;
1760 
1761   /*  wait on receives */
1762   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1763   source = lens + nrecvs;
1764   count  = nrecvs; slen = 0;
1765   while (count) {
1766     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1767     /* unpack receives into our local space */
1768     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1769     source[imdex]  = recv_status.MPI_SOURCE;
1770     lens[imdex]    = n;
1771     slen          += n;
1772     count--;
1773   }
1774   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1775 
1776   /* move the data into the send scatter */
1777   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1778   count = 0;
1779   for (i=0; i<nrecvs; i++) {
1780     values = rvalues + i*nmax;
1781     for (j=0; j<lens[i]; j++) {
1782       lrows[count++] = values[j] - base;
1783     }
1784   }
1785   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1786   ierr = PetscFree(lens);CHKERRQ(ierr);
1787   ierr = PetscFree(owner);CHKERRQ(ierr);
1788   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1789 
1790   /* actually zap the local rows */
1791   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1792   PetscLogObjectParent(A,istmp);
1793 
1794   /*
1795         Zero the required rows. If the "diagonal block" of the matrix
1796      is square and the user wishes to set the diagonal we use seperate
1797      code so that MatSetValues() is not called for each diagonal allocating
1798      new memory, thus calling lots of mallocs and slowing things down.
1799 
1800        Contributed by: Mathew Knepley
1801   */
1802   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1803   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1804   if (diag && (l->A->M == l->A->N)) {
1805     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1806   } else if (diag) {
1807     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1808     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1809       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1810 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1811     }
1812     for (i=0; i<slen; i++) {
1813       row  = lrows[i] + rstart_bs;
1814       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1815     }
1816     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1817     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1818   } else {
1819     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1820   }
1821 
1822   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1823   ierr = PetscFree(lrows);CHKERRQ(ierr);
1824 
1825   /* wait on sends */
1826   if (nsends) {
1827     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1828     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1829     ierr = PetscFree(send_status);CHKERRQ(ierr);
1830   }
1831   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1832   ierr = PetscFree(svalues);CHKERRQ(ierr);
1833 
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1839 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1840 {
1841   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1842   MPI_Comm          comm = A->comm;
1843   static PetscTruth called = PETSC_FALSE;
1844   PetscErrorCode    ierr;
1845 
1846   PetscFunctionBegin;
1847   if (!a->rank) {
1848     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1849   }
1850   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1851   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1852   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1858 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1859 {
1860   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1861   PetscErrorCode ierr;
1862 
1863   PetscFunctionBegin;
1864   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatEqual_MPIBAIJ"
1872 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1873 {
1874   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1875   Mat            a,b,c,d;
1876   PetscTruth     flg;
1877   PetscErrorCode ierr;
1878 
1879   PetscFunctionBegin;
1880   a = matA->A; b = matA->B;
1881   c = matB->A; d = matB->B;
1882 
1883   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1884   if (flg == PETSC_TRUE) {
1885     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1886   }
1887   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1894 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1895 {
1896   PetscErrorCode ierr;
1897 
1898   PetscFunctionBegin;
1899   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 /* -------------------------------------------------------------------*/
1904 static struct _MatOps MatOps_Values = {
1905        MatSetValues_MPIBAIJ,
1906        MatGetRow_MPIBAIJ,
1907        MatRestoreRow_MPIBAIJ,
1908        MatMult_MPIBAIJ,
1909 /* 4*/ MatMultAdd_MPIBAIJ,
1910        MatMultTranspose_MPIBAIJ,
1911        MatMultTransposeAdd_MPIBAIJ,
1912        0,
1913        0,
1914        0,
1915 /*10*/ 0,
1916        0,
1917        0,
1918        0,
1919        MatTranspose_MPIBAIJ,
1920 /*15*/ MatGetInfo_MPIBAIJ,
1921        MatEqual_MPIBAIJ,
1922        MatGetDiagonal_MPIBAIJ,
1923        MatDiagonalScale_MPIBAIJ,
1924        MatNorm_MPIBAIJ,
1925 /*20*/ MatAssemblyBegin_MPIBAIJ,
1926        MatAssemblyEnd_MPIBAIJ,
1927        0,
1928        MatSetOption_MPIBAIJ,
1929        MatZeroEntries_MPIBAIJ,
1930 /*25*/ MatZeroRows_MPIBAIJ,
1931        0,
1932        0,
1933        0,
1934        0,
1935 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1936        0,
1937        0,
1938        0,
1939        0,
1940 /*35*/ MatDuplicate_MPIBAIJ,
1941        0,
1942        0,
1943        0,
1944        0,
1945 /*40*/ 0,
1946        MatGetSubMatrices_MPIBAIJ,
1947        MatIncreaseOverlap_MPIBAIJ,
1948        MatGetValues_MPIBAIJ,
1949        0,
1950 /*45*/ MatPrintHelp_MPIBAIJ,
1951        MatScale_MPIBAIJ,
1952        0,
1953        0,
1954        0,
1955 /*50*/ 0,
1956        0,
1957        0,
1958        0,
1959        0,
1960 /*55*/ 0,
1961        0,
1962        MatSetUnfactored_MPIBAIJ,
1963        0,
1964        MatSetValuesBlocked_MPIBAIJ,
1965 /*60*/ 0,
1966        MatDestroy_MPIBAIJ,
1967        MatView_MPIBAIJ,
1968        MatGetPetscMaps_Petsc,
1969        0,
1970 /*65*/ 0,
1971        0,
1972        0,
1973        0,
1974        0,
1975 /*70*/ MatGetRowMax_MPIBAIJ,
1976        0,
1977        0,
1978        0,
1979        0,
1980 /*75*/ 0,
1981        0,
1982        0,
1983        0,
1984        0,
1985 /*80*/ 0,
1986        0,
1987        0,
1988        0,
1989        MatLoad_MPIBAIJ,
1990 /*85*/ 0,
1991        0,
1992        0,
1993        0,
1994        0,
1995 /*90*/ 0,
1996        0,
1997        0,
1998        0,
1999        0,
2000 /*95*/ 0,
2001        0,
2002        0,
2003        0};
2004 
2005 
2006 EXTERN_C_BEGIN
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2009 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2010 {
2011   PetscFunctionBegin;
2012   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2013   *iscopy = PETSC_FALSE;
2014   PetscFunctionReturn(0);
2015 }
2016 EXTERN_C_END
2017 
2018 EXTERN_C_BEGIN
2019 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
2020 EXTERN_C_END
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2024 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2025 {
2026   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2027   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2028   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2029   const PetscInt *JJ;
2030   PetscScalar    *values;
2031   PetscErrorCode ierr;
2032 
2033   PetscFunctionBegin;
2034 #if defined(PETSC_OPT_g)
2035   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2036 #endif
2037   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2038   o_nnz = d_nnz + m;
2039 
2040   for (i=0; i<m; i++) {
2041     nnz     = I[i+1]- I[i];
2042     JJ      = J + I[i];
2043     nnz_max = PetscMax(nnz_max,nnz);
2044 #if defined(PETSC_OPT_g)
2045     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2046 #endif
2047     for (j=0; j<nnz; j++) {
2048       if (*JJ >= cstart) break;
2049       JJ++;
2050     }
2051     d = 0;
2052     for (; j<nnz; j++) {
2053       if (*JJ++ >= cend) break;
2054       d++;
2055     }
2056     d_nnz[i] = d;
2057     o_nnz[i] = nnz - d;
2058   }
2059   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2060   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2061 
2062   if (v) values = (PetscScalar*)v;
2063   else {
2064     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2065     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2066   }
2067 
2068   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2069   for (i=0; i<m; i++) {
2070     ii   = i + rstart;
2071     nnz  = I[i+1]- I[i];
2072     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2073   }
2074   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2075   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2076   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2077 
2078   if (!v) {
2079     ierr = PetscFree(values);CHKERRQ(ierr);
2080   }
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2086 /*@C
2087    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2088    (the default parallel PETSc format).
2089 
2090    Collective on MPI_Comm
2091 
2092    Input Parameters:
2093 +  A - the matrix
2094 .  i - the indices into j for the start of each local row (starts with zero)
2095 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2096 -  v - optional values in the matrix
2097 
2098    Level: developer
2099 
2100 .keywords: matrix, aij, compressed row, sparse, parallel
2101 
2102 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2103 @*/
2104 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2105 {
2106   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2107 
2108   PetscFunctionBegin;
2109   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2110   if (f) {
2111     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2112   }
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 EXTERN_C_BEGIN
2117 #undef __FUNCT__
2118 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2119 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2120 {
2121   Mat_MPIBAIJ    *b;
2122   PetscErrorCode ierr;
2123   PetscInt       i;
2124 
2125   PetscFunctionBegin;
2126   B->preallocated = PETSC_TRUE;
2127   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2128 
2129   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2130   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2131   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2132   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2133   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2134   if (d_nnz) {
2135   for (i=0; i<B->m/bs; i++) {
2136       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2137     }
2138   }
2139   if (o_nnz) {
2140     for (i=0; i<B->m/bs; i++) {
2141       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2142     }
2143   }
2144 
2145   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2146   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2147   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2148   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2149 
2150   b = (Mat_MPIBAIJ*)B->data;
2151   B->bs  = bs;
2152   b->bs2 = bs*bs;
2153   b->mbs = B->m/bs;
2154   b->nbs = B->n/bs;
2155   b->Mbs = B->M/bs;
2156   b->Nbs = B->N/bs;
2157 
2158   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2159   b->rowners[0]    = 0;
2160   for (i=2; i<=b->size; i++) {
2161     b->rowners[i] += b->rowners[i-1];
2162   }
2163   b->rstart    = b->rowners[b->rank];
2164   b->rend      = b->rowners[b->rank+1];
2165 
2166   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2167   b->cowners[0] = 0;
2168   for (i=2; i<=b->size; i++) {
2169     b->cowners[i] += b->cowners[i-1];
2170   }
2171   b->cstart    = b->cowners[b->rank];
2172   b->cend      = b->cowners[b->rank+1];
2173 
2174   for (i=0; i<=b->size; i++) {
2175     b->rowners_bs[i] = b->rowners[i]*bs;
2176   }
2177   b->rstart_bs = b->rstart*bs;
2178   b->rend_bs   = b->rend*bs;
2179   b->cstart_bs = b->cstart*bs;
2180   b->cend_bs   = b->cend*bs;
2181 
2182   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2183   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2184   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2185   PetscLogObjectParent(B,b->A);
2186   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2187   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2188   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2189   PetscLogObjectParent(B,b->B);
2190 
2191   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2192 
2193   PetscFunctionReturn(0);
2194 }
2195 EXTERN_C_END
2196 
2197 EXTERN_C_BEGIN
2198 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2199 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2200 EXTERN_C_END
2201 
2202 /*MC
2203    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2204 
2205    Options Database Keys:
2206 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2207 
2208   Level: beginner
2209 
2210 .seealso: MatCreateMPIBAIJ
2211 M*/
2212 
2213 EXTERN_C_BEGIN
2214 #undef __FUNCT__
2215 #define __FUNCT__ "MatCreate_MPIBAIJ"
2216 PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2217 {
2218   Mat_MPIBAIJ    *b;
2219   PetscErrorCode ierr;
2220   PetscTruth     flg;
2221 
2222   PetscFunctionBegin;
2223   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2224   B->data = (void*)b;
2225 
2226   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2227   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2228   B->mapping    = 0;
2229   B->factor     = 0;
2230   B->assembled  = PETSC_FALSE;
2231 
2232   B->insertmode = NOT_SET_VALUES;
2233   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2234   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2235 
2236   /* build local table of row and column ownerships */
2237   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2238   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2239   b->cowners    = b->rowners + b->size + 2;
2240   b->rowners_bs = b->cowners + b->size + 2;
2241 
2242   /* build cache for off array entries formed */
2243   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2244   b->donotstash  = PETSC_FALSE;
2245   b->colmap      = PETSC_NULL;
2246   b->garray      = PETSC_NULL;
2247   b->roworiented = PETSC_TRUE;
2248 
2249 #if defined(PETSC_USE_MAT_SINGLE)
2250   /* stuff for MatSetValues_XXX in single precision */
2251   b->setvalueslen     = 0;
2252   b->setvaluescopy    = PETSC_NULL;
2253 #endif
2254 
2255   /* stuff used in block assembly */
2256   b->barray       = 0;
2257 
2258   /* stuff used for matrix vector multiply */
2259   b->lvec         = 0;
2260   b->Mvctx        = 0;
2261 
2262   /* stuff for MatGetRow() */
2263   b->rowindices   = 0;
2264   b->rowvalues    = 0;
2265   b->getrowactive = PETSC_FALSE;
2266 
2267   /* hash table stuff */
2268   b->ht           = 0;
2269   b->hd           = 0;
2270   b->ht_size      = 0;
2271   b->ht_flag      = PETSC_FALSE;
2272   b->ht_fact      = 0;
2273   b->ht_total_ct  = 0;
2274   b->ht_insert_ct = 0;
2275 
2276   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2277   if (flg) {
2278     PetscReal fact = 1.39;
2279     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2280     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2281     if (fact <= 1.0) fact = 1.39;
2282     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2283     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2284   }
2285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2286                                      "MatStoreValues_MPIBAIJ",
2287                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2289                                      "MatRetrieveValues_MPIBAIJ",
2290                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2291   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2292                                      "MatGetDiagonalBlock_MPIBAIJ",
2293                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2294   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2295                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2296                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2297   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2298 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2299 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2300   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2301                                      "MatDiagonalScaleLocal_MPIBAIJ",
2302                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2303   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2304                                      "MatSetHashTableFactor_MPIBAIJ",
2305                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2306   PetscFunctionReturn(0);
2307 }
2308 EXTERN_C_END
2309 
2310 /*MC
2311    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2312 
2313    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2314    and MATMPIBAIJ otherwise.
2315 
2316    Options Database Keys:
2317 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2318 
2319   Level: beginner
2320 
2321 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2322 M*/
2323 
2324 EXTERN_C_BEGIN
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatCreate_BAIJ"
2327 PetscErrorCode MatCreate_BAIJ(Mat A)
2328 {
2329   PetscErrorCode ierr;
2330   PetscMPIInt    size;
2331 
2332   PetscFunctionBegin;
2333   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2334   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2335   if (size == 1) {
2336     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2337   } else {
2338     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2339   }
2340   PetscFunctionReturn(0);
2341 }
2342 EXTERN_C_END
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2346 /*@C
2347    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2348    (block compressed row).  For good matrix assembly performance
2349    the user should preallocate the matrix storage by setting the parameters
2350    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2351    performance can be increased by more than a factor of 50.
2352 
2353    Collective on Mat
2354 
2355    Input Parameters:
2356 +  A - the matrix
2357 .  bs   - size of blockk
2358 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2359            submatrix  (same for all local rows)
2360 .  d_nnz - array containing the number of block nonzeros in the various block rows
2361            of the in diagonal portion of the local (possibly different for each block
2362            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2363 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2364            submatrix (same for all local rows).
2365 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2366            off-diagonal portion of the local submatrix (possibly different for
2367            each block row) or PETSC_NULL.
2368 
2369    If the *_nnz parameter is given then the *_nz parameter is ignored
2370 
2371    Options Database Keys:
2372 .   -mat_no_unroll - uses code that does not unroll the loops in the
2373                      block calculations (much slower)
2374 .   -mat_block_size - size of the blocks to use
2375 
2376    Notes:
2377    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2378    than it must be used on all processors that share the object for that argument.
2379 
2380    Storage Information:
2381    For a square global matrix we define each processor's diagonal portion
2382    to be its local rows and the corresponding columns (a square submatrix);
2383    each processor's off-diagonal portion encompasses the remainder of the
2384    local matrix (a rectangular submatrix).
2385 
2386    The user can specify preallocated storage for the diagonal part of
2387    the local submatrix with either d_nz or d_nnz (not both).  Set
2388    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2389    memory allocation.  Likewise, specify preallocated storage for the
2390    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2391 
2392    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2393    the figure below we depict these three local rows and all columns (0-11).
2394 
2395 .vb
2396            0 1 2 3 4 5 6 7 8 9 10 11
2397           -------------------
2398    row 3  |  o o o d d d o o o o o o
2399    row 4  |  o o o d d d o o o o o o
2400    row 5  |  o o o d d d o o o o o o
2401           -------------------
2402 .ve
2403 
2404    Thus, any entries in the d locations are stored in the d (diagonal)
2405    submatrix, and any entries in the o locations are stored in the
2406    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2407    stored simply in the MATSEQBAIJ format for compressed row storage.
2408 
2409    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2410    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2411    In general, for PDE problems in which most nonzeros are near the diagonal,
2412    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2413    or you will get TERRIBLE performance; see the users' manual chapter on
2414    matrices.
2415 
2416    Level: intermediate
2417 
2418 .keywords: matrix, block, aij, compressed row, sparse, parallel
2419 
2420 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2421 @*/
2422 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2423 {
2424   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2425 
2426   PetscFunctionBegin;
2427   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2428   if (f) {
2429     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2430   }
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "MatCreateMPIBAIJ"
2436 /*@C
2437    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2438    (block compressed row).  For good matrix assembly performance
2439    the user should preallocate the matrix storage by setting the parameters
2440    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2441    performance can be increased by more than a factor of 50.
2442 
2443    Collective on MPI_Comm
2444 
2445    Input Parameters:
2446 +  comm - MPI communicator
2447 .  bs   - size of blockk
2448 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2449            This value should be the same as the local size used in creating the
2450            y vector for the matrix-vector product y = Ax.
2451 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2452            This value should be the same as the local size used in creating the
2453            x vector for the matrix-vector product y = Ax.
2454 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2455 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2456 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2457            submatrix  (same for all local rows)
2458 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2459            of the in diagonal portion of the local (possibly different for each block
2460            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2461 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2462            submatrix (same for all local rows).
2463 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2464            off-diagonal portion of the local submatrix (possibly different for
2465            each block row) or PETSC_NULL.
2466 
2467    Output Parameter:
2468 .  A - the matrix
2469 
2470    Options Database Keys:
2471 .   -mat_no_unroll - uses code that does not unroll the loops in the
2472                      block calculations (much slower)
2473 .   -mat_block_size - size of the blocks to use
2474 
2475    Notes:
2476    If the *_nnz parameter is given then the *_nz parameter is ignored
2477 
2478    A nonzero block is any block that as 1 or more nonzeros in it
2479 
2480    The user MUST specify either the local or global matrix dimensions
2481    (possibly both).
2482 
2483    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2484    than it must be used on all processors that share the object for that argument.
2485 
2486    Storage Information:
2487    For a square global matrix we define each processor's diagonal portion
2488    to be its local rows and the corresponding columns (a square submatrix);
2489    each processor's off-diagonal portion encompasses the remainder of the
2490    local matrix (a rectangular submatrix).
2491 
2492    The user can specify preallocated storage for the diagonal part of
2493    the local submatrix with either d_nz or d_nnz (not both).  Set
2494    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2495    memory allocation.  Likewise, specify preallocated storage for the
2496    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2497 
2498    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2499    the figure below we depict these three local rows and all columns (0-11).
2500 
2501 .vb
2502            0 1 2 3 4 5 6 7 8 9 10 11
2503           -------------------
2504    row 3  |  o o o d d d o o o o o o
2505    row 4  |  o o o d d d o o o o o o
2506    row 5  |  o o o d d d o o o o o o
2507           -------------------
2508 .ve
2509 
2510    Thus, any entries in the d locations are stored in the d (diagonal)
2511    submatrix, and any entries in the o locations are stored in the
2512    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2513    stored simply in the MATSEQBAIJ format for compressed row storage.
2514 
2515    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2516    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2517    In general, for PDE problems in which most nonzeros are near the diagonal,
2518    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2519    or you will get TERRIBLE performance; see the users' manual chapter on
2520    matrices.
2521 
2522    Level: intermediate
2523 
2524 .keywords: matrix, block, aij, compressed row, sparse, parallel
2525 
2526 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2527 @*/
2528 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2529 {
2530   PetscErrorCode ierr;
2531   PetscMPIInt    size;
2532 
2533   PetscFunctionBegin;
2534   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2535   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2536   if (size > 1) {
2537     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2538     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2539   } else {
2540     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2541     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2542   }
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2548 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2549 {
2550   Mat            mat;
2551   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2552   PetscErrorCode ierr;
2553   PetscInt       len=0;
2554 
2555   PetscFunctionBegin;
2556   *newmat       = 0;
2557   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2558   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2559   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2560 
2561   mat->factor       = matin->factor;
2562   mat->preallocated = PETSC_TRUE;
2563   mat->assembled    = PETSC_TRUE;
2564   mat->insertmode   = NOT_SET_VALUES;
2565 
2566   a      = (Mat_MPIBAIJ*)mat->data;
2567   mat->bs  = matin->bs;
2568   a->bs2   = oldmat->bs2;
2569   a->mbs   = oldmat->mbs;
2570   a->nbs   = oldmat->nbs;
2571   a->Mbs   = oldmat->Mbs;
2572   a->Nbs   = oldmat->Nbs;
2573 
2574   a->rstart       = oldmat->rstart;
2575   a->rend         = oldmat->rend;
2576   a->cstart       = oldmat->cstart;
2577   a->cend         = oldmat->cend;
2578   a->size         = oldmat->size;
2579   a->rank         = oldmat->rank;
2580   a->donotstash   = oldmat->donotstash;
2581   a->roworiented  = oldmat->roworiented;
2582   a->rowindices   = 0;
2583   a->rowvalues    = 0;
2584   a->getrowactive = PETSC_FALSE;
2585   a->barray       = 0;
2586   a->rstart_bs    = oldmat->rstart_bs;
2587   a->rend_bs      = oldmat->rend_bs;
2588   a->cstart_bs    = oldmat->cstart_bs;
2589   a->cend_bs      = oldmat->cend_bs;
2590 
2591   /* hash table stuff */
2592   a->ht           = 0;
2593   a->hd           = 0;
2594   a->ht_size      = 0;
2595   a->ht_flag      = oldmat->ht_flag;
2596   a->ht_fact      = oldmat->ht_fact;
2597   a->ht_total_ct  = 0;
2598   a->ht_insert_ct = 0;
2599 
2600   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2601   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2602   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2603   if (oldmat->colmap) {
2604 #if defined (PETSC_USE_CTABLE)
2605   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2606 #else
2607   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2608   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2609   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2610 #endif
2611   } else a->colmap = 0;
2612 
2613   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2614     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2615     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2616     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2617   } else a->garray = 0;
2618 
2619   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2620   PetscLogObjectParent(mat,a->lvec);
2621   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2622   PetscLogObjectParent(mat,a->Mvctx);
2623 
2624   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2625   PetscLogObjectParent(mat,a->A);
2626   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2627   PetscLogObjectParent(mat,a->B);
2628   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2629   *newmat = mat;
2630 
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 #include "petscsys.h"
2635 
2636 #undef __FUNCT__
2637 #define __FUNCT__ "MatLoad_MPIBAIJ"
2638 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2639 {
2640   Mat            A;
2641   PetscErrorCode ierr;
2642   int            fd;
2643   PetscInt       i,nz,j,rstart,rend;
2644   PetscScalar    *vals,*buf;
2645   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2646   MPI_Status     status;
2647   PetscMPIInt    rank,size,maxnz;
2648   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2649   PetscInt       *locrowlens,*procsnz = 0,*browners;
2650   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows;
2651   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2652   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2653   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2654 
2655   PetscFunctionBegin;
2656   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2657 
2658   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2659   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2660   if (!rank) {
2661     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2662     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2663     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2664   }
2665 
2666   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2667   M = header[1]; N = header[2];
2668 
2669   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2670 
2671   /*
2672      This code adds extra rows to make sure the number of rows is
2673      divisible by the blocksize
2674   */
2675   Mbs        = M/bs;
2676   extra_rows = bs - M + bs*Mbs;
2677   if (extra_rows == bs) extra_rows = 0;
2678   else                  Mbs++;
2679   if (extra_rows && !rank) {
2680     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2681   }
2682 
2683   /* determine ownership of all rows */
2684   mbs        = Mbs/size + ((Mbs % size) > rank);
2685   m          = mbs*bs;
2686   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2687   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2688   rowners[0] = 0;
2689   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2690   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2691   rstart = rowners[rank];
2692   rend   = rowners[rank+1];
2693 
2694   /* distribute row lengths to all processors */
2695   ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2696   if (!rank) {
2697     mend = m;
2698     if (size == 1) mend = mend - extra_rows;
2699     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2700     for (j=mend; j<m; j++) locrowlens[j] = 1;
2701     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2702     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2703     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2704     for (j=0; j<m; j++) {
2705       procsnz[0] += locrowlens[j];
2706     }
2707     for (i=1; i<size; i++) {
2708       mend = browners[i+1] - browners[i];
2709       if (i == size-1) mend = mend - extra_rows;
2710       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2711       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2712       /* calculate the number of nonzeros on each processor */
2713       for (j=0; j<browners[i+1]-browners[i]; j++) {
2714         procsnz[i] += rowlengths[j];
2715       }
2716       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2717     }
2718     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2719   } else {
2720     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2721   }
2722 
2723   if (!rank) {
2724     /* determine max buffer needed and allocate it */
2725     maxnz = procsnz[0];
2726     for (i=1; i<size; i++) {
2727       maxnz = PetscMax(maxnz,procsnz[i]);
2728     }
2729     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2730 
2731     /* read in my part of the matrix column indices  */
2732     nz     = procsnz[0];
2733     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2734     mycols = ibuf;
2735     if (size == 1)  nz -= extra_rows;
2736     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2737     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2738 
2739     /* read in every ones (except the last) and ship off */
2740     for (i=1; i<size-1; i++) {
2741       nz   = procsnz[i];
2742       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2743       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2744     }
2745     /* read in the stuff for the last proc */
2746     if (size != 1) {
2747       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2748       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2749       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2750       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2751     }
2752     ierr = PetscFree(cols);CHKERRQ(ierr);
2753   } else {
2754     /* determine buffer space needed for message */
2755     nz = 0;
2756     for (i=0; i<m; i++) {
2757       nz += locrowlens[i];
2758     }
2759     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2760     mycols = ibuf;
2761     /* receive message of column indices*/
2762     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2763     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2764     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2765   }
2766 
2767   /* loop over local rows, determining number of off diagonal entries */
2768   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2769   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2770   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2771   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2772   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2773   rowcount = 0; nzcount = 0;
2774   for (i=0; i<mbs; i++) {
2775     dcount  = 0;
2776     odcount = 0;
2777     for (j=0; j<bs; j++) {
2778       kmax = locrowlens[rowcount];
2779       for (k=0; k<kmax; k++) {
2780         tmp = mycols[nzcount++]/bs;
2781         if (!mask[tmp]) {
2782           mask[tmp] = 1;
2783           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2784           else masked1[dcount++] = tmp;
2785         }
2786       }
2787       rowcount++;
2788     }
2789 
2790     dlens[i]  = dcount;
2791     odlens[i] = odcount;
2792 
2793     /* zero out the mask elements we set */
2794     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2795     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2796   }
2797 
2798   /* create our matrix */
2799   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2800   ierr = MatSetType(A,type);CHKERRQ(ierr)
2801   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2802 
2803   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2804   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2805 
2806   if (!rank) {
2807     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2808     /* read in my part of the matrix numerical values  */
2809     nz = procsnz[0];
2810     vals = buf;
2811     mycols = ibuf;
2812     if (size == 1)  nz -= extra_rows;
2813     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2814     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2815 
2816     /* insert into matrix */
2817     jj      = rstart*bs;
2818     for (i=0; i<m; i++) {
2819       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2820       mycols += locrowlens[i];
2821       vals   += locrowlens[i];
2822       jj++;
2823     }
2824     /* read in other processors (except the last one) and ship out */
2825     for (i=1; i<size-1; i++) {
2826       nz   = procsnz[i];
2827       vals = buf;
2828       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2829       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2830     }
2831     /* the last proc */
2832     if (size != 1){
2833       nz   = procsnz[i] - extra_rows;
2834       vals = buf;
2835       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2836       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2837       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2838     }
2839     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2840   } else {
2841     /* receive numeric values */
2842     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2843 
2844     /* receive message of values*/
2845     vals   = buf;
2846     mycols = ibuf;
2847     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2848     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2849     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2850 
2851     /* insert into matrix */
2852     jj      = rstart*bs;
2853     for (i=0; i<m; i++) {
2854       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2855       mycols += locrowlens[i];
2856       vals   += locrowlens[i];
2857       jj++;
2858     }
2859   }
2860   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2861   ierr = PetscFree(buf);CHKERRQ(ierr);
2862   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2863   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2864   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2865   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2866   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2867   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2868 
2869   *newmat = A;
2870   PetscFunctionReturn(0);
2871 }
2872 
2873 #undef __FUNCT__
2874 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2875 /*@
2876    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2877 
2878    Input Parameters:
2879 .  mat  - the matrix
2880 .  fact - factor
2881 
2882    Collective on Mat
2883 
2884    Level: advanced
2885 
2886   Notes:
2887    This can also be set by the command line option: -mat_use_hash_table fact
2888 
2889 .keywords: matrix, hashtable, factor, HT
2890 
2891 .seealso: MatSetOption()
2892 @*/
2893 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2894 {
2895   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2896 
2897   PetscFunctionBegin;
2898   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2899   if (f) {
2900     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2901   }
2902   PetscFunctionReturn(0);
2903 }
2904 
2905 #undef __FUNCT__
2906 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2907 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2908 {
2909   Mat_MPIBAIJ *baij;
2910 
2911   PetscFunctionBegin;
2912   baij = (Mat_MPIBAIJ*)mat->data;
2913   baij->ht_fact = fact;
2914   PetscFunctionReturn(0);
2915 }
2916 
2917 #undef __FUNCT__
2918 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2919 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2920 {
2921   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2922   PetscFunctionBegin;
2923   *Ad     = a->A;
2924   *Ao     = a->B;
2925   *colmap = a->garray;
2926   PetscFunctionReturn(0);
2927 }
2928