xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 046da0a13659bf6557c8549a2fea89880b2ef309)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 
4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
109   PetscErrorCode ierr;
110   PetscInt       nbs = B->nbs,i,bs=B->bs;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
286 {
287   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       i,N = m*n;
290   MatScalar      *vsingle;
291 
292   PetscFunctionBegin;
293   if (N > b->setvalueslen) {
294     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
295     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
296     b->setvalueslen  = N;
297   }
298   vsingle = b->setvaluescopy;
299 
300   for (i=0; i<N; i++) {
301     vsingle[i] = v[i];
302   }
303   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
310 {
311   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
312   PetscErrorCode ierr;
313   PetscInt       i,N = m*n*b->bs2;
314   MatScalar      *vsingle;
315 
316   PetscFunctionBegin;
317   if (N > b->setvalueslen) {
318     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
319     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
320     b->setvalueslen  = N;
321   }
322   vsingle = b->setvaluescopy;
323   for (i=0; i<N; i++) {
324     vsingle[i] = v[i];
325   }
326   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
333 {
334   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
335   PetscErrorCode ierr;
336   PetscInt       i,N = m*n;
337   MatScalar      *vsingle;
338 
339   PetscFunctionBegin;
340   if (N > b->setvalueslen) {
341     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
342     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
343     b->setvalueslen  = N;
344   }
345   vsingle = b->setvaluescopy;
346   for (i=0; i<N; i++) {
347     vsingle[i] = v[i];
348   }
349   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
356 {
357   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
358   PetscErrorCode ierr;
359   PetscInt       i,N = m*n*b->bs2;
360   MatScalar      *vsingle;
361 
362   PetscFunctionBegin;
363   if (N > b->setvalueslen) {
364     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
365     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
366     b->setvalueslen  = N;
367   }
368   vsingle = b->setvaluescopy;
369   for (i=0; i<N; i++) {
370     vsingle[i] = v[i];
371   }
372   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 #endif
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
380 {
381   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
382   MatScalar      value;
383   PetscTruth     roworiented = baij->roworiented;
384   PetscErrorCode ierr;
385   PetscInt       i,j,row,col;
386   PetscInt       rstart_orig=baij->rstart_bs;
387   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
388   PetscInt       cend_orig=baij->cend_bs,bs=baij->bs;
389 
390   /* Some Variables required in the macro */
391   Mat            A = baij->A;
392   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
393   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
394   MatScalar      *aa=a->a;
395 
396   Mat            B = baij->B;
397   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
398   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
399   MatScalar      *ba=b->a;
400 
401   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
402   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
403   MatScalar      *ap,*bap;
404 
405   PetscFunctionBegin;
406   for (i=0; i<m; i++) {
407     if (im[i] < 0) continue;
408 #if defined(PETSC_USE_BOPT_g)
409     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
410 #endif
411     if (im[i] >= rstart_orig && im[i] < rend_orig) {
412       row = im[i] - rstart_orig;
413       for (j=0; j<n; j++) {
414         if (in[j] >= cstart_orig && in[j] < cend_orig){
415           col = in[j] - cstart_orig;
416           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
417           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
418           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_BOPT_g)
421         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!baij->colmap) {
426               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
430             col  = col - 1;
431 #else
432             col = baij->colmap[in[j]/bs] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
435               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
438               B = baij->B;
439               b = (Mat_SeqBAIJ*)(B)->data;
440               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
441               ba=b->a;
442             } else col += in[j]%bs;
443           } else col = in[j];
444           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
445           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
446           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
447         }
448       }
449     } else {
450       if (!baij->donotstash) {
451         if (roworiented) {
452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453         } else {
454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
455         }
456       }
457     }
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
467   const MatScalar *value;
468   MatScalar       *barray=baij->barray;
469   PetscTruth      roworiented = baij->roworiented;
470   PetscErrorCode  ierr;
471   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
472   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
473   PetscInt        cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
474 
475   PetscFunctionBegin;
476   if(!barray) {
477     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
478     baij->barray = barray;
479   }
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487     if (im[i] < 0) continue;
488 #if defined(PETSC_USE_BOPT_g)
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     if (im[i] >= rstart && im[i] < rend) {
492       row = im[i] - rstart;
493       for (j=0; j<n; j++) {
494         /* If NumCol = 1 then a copy is not required */
495         if ((roworiented) && (n == 1)) {
496           barray = (MatScalar*)v + i*bs2;
497         } else if((!roworiented) && (m == 1)) {
498           barray = (MatScalar*)v + j*bs2;
499         } else { /* Here a copy is required */
500           if (roworiented) {
501             value = v + i*(stepval+bs)*bs + j*bs;
502           } else {
503             value = v + j*(stepval+bs)*bs + i*bs;
504           }
505           for (ii=0; ii<bs; ii++,value+=stepval) {
506             for (jj=0; jj<bs; jj++) {
507               *barray++  = *value++;
508             }
509           }
510           barray -=bs2;
511         }
512 
513         if (in[j] >= cstart && in[j] < cend){
514           col  = in[j] - cstart;
515           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516         }
517         else if (in[j] < 0) continue;
518 #if defined(PETSC_USE_BOPT_g)
519         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
520 #endif
521         else {
522           if (mat->was_assembled) {
523             if (!baij->colmap) {
524               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
525             }
526 
527 #if defined(PETSC_USE_BOPT_g)
528 #if defined (PETSC_USE_CTABLE)
529             { PetscInt data;
530               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
531               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
532             }
533 #else
534             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
535 #endif
536 #endif
537 #if defined (PETSC_USE_CTABLE)
538 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
539             col  = (col - 1)/bs;
540 #else
541             col = (baij->colmap[in[j]] - 1)/bs;
542 #endif
543             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
544               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
545               col =  in[j];
546             }
547           }
548           else col = in[j];
549           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
550         }
551       }
552     } else {
553       if (!baij->donotstash) {
554         if (roworiented) {
555           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556         } else {
557           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
558         }
559       }
560     }
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #define HASH_KEY 0.6180339887
566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
569 #undef __FUNCT__
570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
572 {
573   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
574   PetscTruth     roworiented = baij->roworiented;
575   PetscErrorCode ierr;
576   PetscInt       i,j,row,col;
577   PetscInt       rstart_orig=baij->rstart_bs;
578   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
579   PetscInt       h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
580   PetscReal      tmp;
581   MatScalar      **HD = baij->hd,value;
582 #if defined(PETSC_USE_BOPT_g)
583   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
584 #endif
585 
586   PetscFunctionBegin;
587 
588   for (i=0; i<m; i++) {
589 #if defined(PETSC_USE_BOPT_g)
590     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
591     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
592 #endif
593       row = im[i];
594     if (row >= rstart_orig && row < rend_orig) {
595       for (j=0; j<n; j++) {
596         col = in[j];
597         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
598         /* Look up PetscInto the Hash Table */
599         key = (row/bs)*Nbs+(col/bs)+1;
600         h1  = HASH(size,key,tmp);
601 
602 
603         idx = h1;
604 #if defined(PETSC_USE_BOPT_g)
605         insert_ct++;
606         total_ct++;
607         if (HT[idx] != key) {
608           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
609           if (idx == size) {
610             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
611             if (idx == h1) {
612               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
613             }
614           }
615         }
616 #else
617         if (HT[idx] != key) {
618           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
619           if (idx == size) {
620             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
621             if (idx == h1) {
622               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
623             }
624           }
625         }
626 #endif
627         /* A HASH table entry is found, so insert the values at the correct address */
628         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
629         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
630       }
631     } else {
632       if (!baij->donotstash) {
633         if (roworiented) {
634           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
635         } else {
636           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
637         }
638       }
639     }
640   }
641 #if defined(PETSC_USE_BOPT_g)
642   baij->ht_total_ct = total_ct;
643   baij->ht_insert_ct = insert_ct;
644 #endif
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
651 {
652   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
653   PetscTruth      roworiented = baij->roworiented;
654   PetscErrorCode  ierr;
655   PetscInt        i,j,ii,jj,row,col;
656   PetscInt        rstart=baij->rstart ;
657   PetscInt        rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
658   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
659   PetscReal       tmp;
660   MatScalar       **HD = baij->hd,*baij_a;
661   const MatScalar *v_t,*value;
662 #if defined(PETSC_USE_BOPT_g)
663   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
664 #endif
665 
666   PetscFunctionBegin;
667 
668   if (roworiented) {
669     stepval = (n-1)*bs;
670   } else {
671     stepval = (m-1)*bs;
672   }
673   for (i=0; i<m; i++) {
674 #if defined(PETSC_USE_BOPT_g)
675     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
676     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
677 #endif
678     row   = im[i];
679     v_t   = v + i*bs2;
680     if (row >= rstart && row < rend) {
681       for (j=0; j<n; j++) {
682         col = in[j];
683 
684         /* Look up into the Hash Table */
685         key = row*Nbs+col+1;
686         h1  = HASH(size,key,tmp);
687 
688         idx = h1;
689 #if defined(PETSC_USE_BOPT_g)
690         total_ct++;
691         insert_ct++;
692        if (HT[idx] != key) {
693           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
694           if (idx == size) {
695             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
696             if (idx == h1) {
697               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
698             }
699           }
700         }
701 #else
702         if (HT[idx] != key) {
703           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
704           if (idx == size) {
705             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
706             if (idx == h1) {
707               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
708             }
709           }
710         }
711 #endif
712         baij_a = HD[idx];
713         if (roworiented) {
714           /*value = v + i*(stepval+bs)*bs + j*bs;*/
715           /* value = v + (i*(stepval+bs)+j)*bs; */
716           value = v_t;
717           v_t  += bs;
718           if (addv == ADD_VALUES) {
719             for (ii=0; ii<bs; ii++,value+=stepval) {
720               for (jj=ii; jj<bs2; jj+=bs) {
721                 baij_a[jj]  += *value++;
722               }
723             }
724           } else {
725             for (ii=0; ii<bs; ii++,value+=stepval) {
726               for (jj=ii; jj<bs2; jj+=bs) {
727                 baij_a[jj]  = *value++;
728               }
729             }
730           }
731         } else {
732           value = v + j*(stepval+bs)*bs + i*bs;
733           if (addv == ADD_VALUES) {
734             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
735               for (jj=0; jj<bs; jj++) {
736                 baij_a[jj]  += *value++;
737               }
738             }
739           } else {
740             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
741               for (jj=0; jj<bs; jj++) {
742                 baij_a[jj]  = *value++;
743               }
744             }
745           }
746         }
747       }
748     } else {
749       if (!baij->donotstash) {
750         if (roworiented) {
751           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
752         } else {
753           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
754         }
755       }
756     }
757   }
758 #if defined(PETSC_USE_BOPT_g)
759   baij->ht_total_ct = total_ct;
760   baij->ht_insert_ct = insert_ct;
761 #endif
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatGetValues_MPIBAIJ"
767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
768 {
769   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
770   PetscErrorCode ierr;
771   PetscInt       bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
772   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
773 
774   PetscFunctionBegin;
775   for (i=0; i<m; i++) {
776     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
777     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
778     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
779       row = idxm[i] - bsrstart;
780       for (j=0; j<n; j++) {
781         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
782         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
783         if (idxn[j] >= bscstart && idxn[j] < bscend){
784           col = idxn[j] - bscstart;
785           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786         } else {
787           if (!baij->colmap) {
788             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
789           }
790 #if defined (PETSC_USE_CTABLE)
791           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
792           data --;
793 #else
794           data = baij->colmap[idxn[j]/bs]-1;
795 #endif
796           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
797           else {
798             col  = data + idxn[j]%bs;
799             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
800           }
801         }
802       }
803     } else {
804       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
805     }
806   }
807  PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatNorm_MPIBAIJ"
812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
813 {
814   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
815   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
816   PetscErrorCode ierr;
817   PetscInt       i,bs2=baij->bs2;
818   PetscReal      sum = 0.0;
819   MatScalar      *v;
820 
821   PetscFunctionBegin;
822   if (baij->size == 1) {
823     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
824   } else {
825     if (type == NORM_FROBENIUS) {
826       v = amat->a;
827       for (i=0; i<amat->nz*bs2; i++) {
828 #if defined(PETSC_USE_COMPLEX)
829         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
830 #else
831         sum += (*v)*(*v); v++;
832 #endif
833       }
834       v = bmat->a;
835       for (i=0; i<bmat->nz*bs2; i++) {
836 #if defined(PETSC_USE_COMPLEX)
837         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
838 #else
839         sum += (*v)*(*v); v++;
840 #endif
841       }
842       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
843       *nrm = sqrt(*nrm);
844     } else {
845       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
846     }
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 
852 /*
853   Creates the hash table, and sets the table
854   This table is created only once.
855   If new entried need to be added to the matrix
856   then the hash table has to be destroyed and
857   recreated.
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
862 {
863   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
864   Mat            A = baij->A,B=baij->B;
865   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
866   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
867   PetscErrorCode ierr;
868   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
869   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
870   PetscInt       *HT,key;
871   MatScalar      **HD;
872   PetscReal      tmp;
873 #if defined(PETSC_USE_BOPT_g)
874   PetscInt       ct=0,max=0;
875 #endif
876 
877   PetscFunctionBegin;
878   baij->ht_size=(PetscInt)(factor*nz);
879   size = baij->ht_size;
880 
881   if (baij->ht) {
882     PetscFunctionReturn(0);
883   }
884 
885   /* Allocate Memory for Hash Table */
886   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
887   baij->ht = (PetscInt*)(baij->hd + size);
888   HD       = baij->hd;
889   HT       = baij->ht;
890 
891 
892   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
893 
894 
895   /* Loop Over A */
896   for (i=0; i<a->mbs; i++) {
897     for (j=ai[i]; j<ai[i+1]; j++) {
898       row = i+rstart;
899       col = aj[j]+cstart;
900 
901       key = row*Nbs + col + 1;
902       h1  = HASH(size,key,tmp);
903       for (k=0; k<size; k++){
904         if (!HT[(h1+k)%size]) {
905           HT[(h1+k)%size] = key;
906           HD[(h1+k)%size] = a->a + j*bs2;
907           break;
908 #if defined(PETSC_USE_BOPT_g)
909         } else {
910           ct++;
911 #endif
912         }
913       }
914 #if defined(PETSC_USE_BOPT_g)
915       if (k> max) max = k;
916 #endif
917     }
918   }
919   /* Loop Over B */
920   for (i=0; i<b->mbs; i++) {
921     for (j=bi[i]; j<bi[i+1]; j++) {
922       row = i+rstart;
923       col = garray[bj[j]];
924       key = row*Nbs + col + 1;
925       h1  = HASH(size,key,tmp);
926       for (k=0; k<size; k++){
927         if (!HT[(h1+k)%size]) {
928           HT[(h1+k)%size] = key;
929           HD[(h1+k)%size] = b->a + j*bs2;
930           break;
931 #if defined(PETSC_USE_BOPT_g)
932         } else {
933           ct++;
934 #endif
935         }
936       }
937 #if defined(PETSC_USE_BOPT_g)
938       if (k> max) max = k;
939 #endif
940     }
941   }
942 
943   /* Print Summary */
944 #if defined(PETSC_USE_BOPT_g)
945   for (i=0,j=0; i<size; i++) {
946     if (HT[i]) {j++;}
947   }
948   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
949 #endif
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
956 {
957   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
958   PetscErrorCode ierr;
959   PetscInt       nstash,reallocs;
960   InsertMode     addv;
961 
962   PetscFunctionBegin;
963   if (baij->donotstash) {
964     PetscFunctionReturn(0);
965   }
966 
967   /* make sure all processors are either in INSERTMODE or ADDMODE */
968   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
969   if (addv == (ADD_VALUES|INSERT_VALUES)) {
970     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
971   }
972   mat->insertmode = addv; /* in case this processor had no cache */
973 
974   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
975   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
976   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
977   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
978   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
979   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
986 {
987   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
988   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
989   PetscErrorCode ierr;
990   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
991   PetscInt       *row,*col,other_disassembled;
992   PetscTruth     r1,r2,r3;
993   MatScalar      *val;
994   InsertMode     addv = mat->insertmode;
995   PetscMPIInt    n;
996 
997   PetscFunctionBegin;
998   if (!baij->donotstash) {
999     while (1) {
1000       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1001       if (!flg) break;
1002 
1003       for (i=0; i<n;) {
1004         /* Now identify the consecutive vals belonging to the same row */
1005         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1006         if (j < n) ncols = j-i;
1007         else       ncols = n-i;
1008         /* Now assemble all these values with a single function call */
1009         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1010         i = j;
1011       }
1012     }
1013     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1014     /* Now process the block-stash. Since the values are stashed column-oriented,
1015        set the roworiented flag to column oriented, and after MatSetValues()
1016        restore the original flags */
1017     r1 = baij->roworiented;
1018     r2 = a->roworiented;
1019     r3 = b->roworiented;
1020     baij->roworiented = PETSC_FALSE;
1021     a->roworiented    = PETSC_FALSE;
1022     b->roworiented    = PETSC_FALSE;
1023     while (1) {
1024       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1025       if (!flg) break;
1026 
1027       for (i=0; i<n;) {
1028         /* Now identify the consecutive vals belonging to the same row */
1029         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1030         if (j < n) ncols = j-i;
1031         else       ncols = n-i;
1032         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1033         i = j;
1034       }
1035     }
1036     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1037     baij->roworiented = r1;
1038     a->roworiented    = r2;
1039     b->roworiented    = r3;
1040   }
1041 
1042   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1043   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1044 
1045   /* determine if any processor has disassembled, if so we must
1046      also disassemble ourselfs, in order that we may reassemble. */
1047   /*
1048      if nonzero structure of submatrix B cannot change then we know that
1049      no processor disassembled thus we can skip this stuff
1050   */
1051   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1052     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1053     if (mat->was_assembled && !other_disassembled) {
1054       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1055     }
1056   }
1057 
1058   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1059     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1060   }
1061   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1062   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1063 
1064 #if defined(PETSC_USE_BOPT_g)
1065   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1066     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1067     baij->ht_total_ct  = 0;
1068     baij->ht_insert_ct = 0;
1069   }
1070 #endif
1071   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1072     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1073     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1074     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1075   }
1076 
1077   if (baij->rowvalues) {
1078     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1079     baij->rowvalues = 0;
1080   }
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1086 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1087 {
1088   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1089   PetscErrorCode    ierr;
1090   PetscMPIInt       size = baij->size,rank = baij->rank;
1091   PetscInt          bs = baij->bs;
1092   PetscTruth        iascii,isdraw;
1093   PetscViewer       sviewer;
1094   PetscViewerFormat format;
1095 
1096   PetscFunctionBegin;
1097   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1098   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1099   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1100   if (iascii) {
1101     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1102     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1103       MatInfo info;
1104       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1105       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1106       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1107               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1108               baij->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1109       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1110       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1111       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1112       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1113       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1114       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1115       PetscFunctionReturn(0);
1116     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1117       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1118       PetscFunctionReturn(0);
1119     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1120       PetscFunctionReturn(0);
1121     }
1122   }
1123 
1124   if (isdraw) {
1125     PetscDraw       draw;
1126     PetscTruth isnull;
1127     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1128     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1129   }
1130 
1131   if (size == 1) {
1132     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1133     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1134   } else {
1135     /* assemble the entire matrix onto first processor. */
1136     Mat         A;
1137     Mat_SeqBAIJ *Aloc;
1138     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1139     MatScalar   *a;
1140 
1141     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1142     /* Perhaps this should be the type of mat? */
1143     if (!rank) {
1144       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1145     } else {
1146       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1147     }
1148     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1149     ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1150     PetscLogObjectParent(mat,A);
1151 
1152     /* copy over the A part */
1153     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1154     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1156 
1157     for (i=0; i<mbs; i++) {
1158       rvals[0] = bs*(baij->rstart + i);
1159       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1160       for (j=ai[i]; j<ai[i+1]; j++) {
1161         col = (baij->cstart+aj[j])*bs;
1162         for (k=0; k<bs; k++) {
1163           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1164           col++; a += bs;
1165         }
1166       }
1167     }
1168     /* copy over the B part */
1169     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1170     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1171     for (i=0; i<mbs; i++) {
1172       rvals[0] = bs*(baij->rstart + i);
1173       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1174       for (j=ai[i]; j<ai[i+1]; j++) {
1175         col = baij->garray[aj[j]]*bs;
1176         for (k=0; k<bs; k++) {
1177           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1178           col++; a += bs;
1179         }
1180       }
1181     }
1182     ierr = PetscFree(rvals);CHKERRQ(ierr);
1183     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1184     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1185     /*
1186        Everyone has to call to draw the matrix since the graphics waits are
1187        synchronized across all processors that share the PetscDraw object
1188     */
1189     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1190     if (!rank) {
1191       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1192       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1193     }
1194     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1195     ierr = MatDestroy(A);CHKERRQ(ierr);
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatView_MPIBAIJ"
1202 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1203 {
1204   PetscErrorCode ierr;
1205   PetscTruth     iascii,isdraw,issocket,isbinary;
1206 
1207   PetscFunctionBegin;
1208   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1209   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1210   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1211   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1212   if (iascii || isdraw || issocket || isbinary) {
1213     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1214   } else {
1215     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1216   }
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1222 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1223 {
1224   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1225   PetscErrorCode ierr;
1226 
1227   PetscFunctionBegin;
1228 #if defined(PETSC_USE_LOG)
1229   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1230 #endif
1231   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1232   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1233   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1234   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1235   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1236 #if defined (PETSC_USE_CTABLE)
1237   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1238 #else
1239   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1240 #endif
1241   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1242   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1243   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1244   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1245   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1246   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1247 #if defined(PETSC_USE_MAT_SINGLE)
1248   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1249 #endif
1250   ierr = PetscFree(baij);CHKERRQ(ierr);
1251 
1252   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1256   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1257   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatMult_MPIBAIJ"
1263 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1264 {
1265   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1266   PetscErrorCode ierr;
1267   PetscInt       nt;
1268 
1269   PetscFunctionBegin;
1270   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1271   if (nt != A->n) {
1272     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1273   }
1274   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1275   if (nt != A->m) {
1276     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1277   }
1278   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1279   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1280   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1281   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1282   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1288 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1289 {
1290   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1291   PetscErrorCode ierr;
1292 
1293   PetscFunctionBegin;
1294   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1295   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1296   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1297   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1303 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1304 {
1305   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1306   PetscErrorCode ierr;
1307 
1308   PetscFunctionBegin;
1309   /* do nondiagonal part */
1310   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1311   /* send it on its way */
1312   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1313   /* do local part */
1314   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1315   /* receive remote parts: note this assumes the values are not actually */
1316   /* inserted in yy until the next line, which is true for my implementation*/
1317   /* but is not perhaps always true. */
1318   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1324 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1325 {
1326   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1327   PetscErrorCode ierr;
1328 
1329   PetscFunctionBegin;
1330   /* do nondiagonal part */
1331   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1332   /* send it on its way */
1333   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1334   /* do local part */
1335   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1336   /* receive remote parts: note this assumes the values are not actually */
1337   /* inserted in yy until the next line, which is true for my implementation*/
1338   /* but is not perhaps always true. */
1339   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 /*
1344   This only works correctly for square matrices where the subblock A->A is the
1345    diagonal block
1346 */
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1349 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1350 {
1351   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1352   PetscErrorCode ierr;
1353 
1354   PetscFunctionBegin;
1355   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1356   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatScale_MPIBAIJ"
1362 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1363 {
1364   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1365   PetscErrorCode ierr;
1366 
1367   PetscFunctionBegin;
1368   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1369   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1375 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1376 {
1377   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1378   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1379   PetscErrorCode ierr;
1380   PetscInt       bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1381   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1382   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1383 
1384   PetscFunctionBegin;
1385   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1386   mat->getrowactive = PETSC_TRUE;
1387 
1388   if (!mat->rowvalues && (idx || v)) {
1389     /*
1390         allocate enough space to hold information from the longest row.
1391     */
1392     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1393     PetscInt     max = 1,mbs = mat->mbs,tmp;
1394     for (i=0; i<mbs; i++) {
1395       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1396       if (max < tmp) { max = tmp; }
1397     }
1398     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1399     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1400   }
1401 
1402   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1403   lrow = row - brstart;
1404 
1405   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1406   if (!v)   {pvA = 0; pvB = 0;}
1407   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1408   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1409   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1410   nztot = nzA + nzB;
1411 
1412   cmap  = mat->garray;
1413   if (v  || idx) {
1414     if (nztot) {
1415       /* Sort by increasing column numbers, assuming A and B already sorted */
1416       PetscInt imark = -1;
1417       if (v) {
1418         *v = v_p = mat->rowvalues;
1419         for (i=0; i<nzB; i++) {
1420           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1421           else break;
1422         }
1423         imark = i;
1424         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1425         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1426       }
1427       if (idx) {
1428         *idx = idx_p = mat->rowindices;
1429         if (imark > -1) {
1430           for (i=0; i<imark; i++) {
1431             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1432           }
1433         } else {
1434           for (i=0; i<nzB; i++) {
1435             if (cmap[cworkB[i]/bs] < cstart)
1436               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1437             else break;
1438           }
1439           imark = i;
1440         }
1441         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1442         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1443       }
1444     } else {
1445       if (idx) *idx = 0;
1446       if (v)   *v   = 0;
1447     }
1448   }
1449   *nz = nztot;
1450   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1451   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1457 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1458 {
1459   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1460 
1461   PetscFunctionBegin;
1462   if (baij->getrowactive == PETSC_FALSE) {
1463     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1464   }
1465   baij->getrowactive = PETSC_FALSE;
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1471 PetscErrorCode MatGetBlockSize_MPIBAIJ(Mat mat,PetscInt *bs)
1472 {
1473   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1474 
1475   PetscFunctionBegin;
1476   *bs = baij->bs;
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1482 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1483 {
1484   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1489   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1495 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1496 {
1497   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1498   Mat            A = a->A,B = a->B;
1499   PetscErrorCode ierr;
1500   PetscReal      isend[5],irecv[5];
1501 
1502   PetscFunctionBegin;
1503   info->block_size     = (PetscReal)a->bs;
1504   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1505   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1506   isend[3] = info->memory;  isend[4] = info->mallocs;
1507   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1508   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1509   isend[3] += info->memory;  isend[4] += info->mallocs;
1510   if (flag == MAT_LOCAL) {
1511     info->nz_used      = isend[0];
1512     info->nz_allocated = isend[1];
1513     info->nz_unneeded  = isend[2];
1514     info->memory       = isend[3];
1515     info->mallocs      = isend[4];
1516   } else if (flag == MAT_GLOBAL_MAX) {
1517     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1518     info->nz_used      = irecv[0];
1519     info->nz_allocated = irecv[1];
1520     info->nz_unneeded  = irecv[2];
1521     info->memory       = irecv[3];
1522     info->mallocs      = irecv[4];
1523   } else if (flag == MAT_GLOBAL_SUM) {
1524     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1525     info->nz_used      = irecv[0];
1526     info->nz_allocated = irecv[1];
1527     info->nz_unneeded  = irecv[2];
1528     info->memory       = irecv[3];
1529     info->mallocs      = irecv[4];
1530   } else {
1531     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1532   }
1533   info->rows_global       = (PetscReal)A->M;
1534   info->columns_global    = (PetscReal)A->N;
1535   info->rows_local        = (PetscReal)A->m;
1536   info->columns_local     = (PetscReal)A->N;
1537   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1538   info->fill_ratio_needed = 0;
1539   info->factor_mallocs    = 0;
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1545 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1546 {
1547   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1548   PetscErrorCode ierr;
1549 
1550   PetscFunctionBegin;
1551   switch (op) {
1552   case MAT_NO_NEW_NONZERO_LOCATIONS:
1553   case MAT_YES_NEW_NONZERO_LOCATIONS:
1554   case MAT_COLUMNS_UNSORTED:
1555   case MAT_COLUMNS_SORTED:
1556   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1557   case MAT_KEEP_ZEROED_ROWS:
1558   case MAT_NEW_NONZERO_LOCATION_ERR:
1559     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1560     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1561     break;
1562   case MAT_ROW_ORIENTED:
1563     a->roworiented = PETSC_TRUE;
1564     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1565     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1566     break;
1567   case MAT_ROWS_SORTED:
1568   case MAT_ROWS_UNSORTED:
1569   case MAT_YES_NEW_DIAGONALS:
1570     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1571     break;
1572   case MAT_COLUMN_ORIENTED:
1573     a->roworiented = PETSC_FALSE;
1574     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1575     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1576     break;
1577   case MAT_IGNORE_OFF_PROC_ENTRIES:
1578     a->donotstash = PETSC_TRUE;
1579     break;
1580   case MAT_NO_NEW_DIAGONALS:
1581     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1582   case MAT_USE_HASH_TABLE:
1583     a->ht_flag = PETSC_TRUE;
1584     break;
1585   case MAT_SYMMETRIC:
1586   case MAT_STRUCTURALLY_SYMMETRIC:
1587   case MAT_HERMITIAN:
1588   case MAT_SYMMETRY_ETERNAL:
1589     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1590     break;
1591   case MAT_NOT_SYMMETRIC:
1592   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1593   case MAT_NOT_HERMITIAN:
1594   case MAT_NOT_SYMMETRY_ETERNAL:
1595     break;
1596   default:
1597     SETERRQ(PETSC_ERR_SUP,"unknown option");
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1604 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1605 {
1606   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1607   Mat_SeqBAIJ    *Aloc;
1608   Mat            B;
1609   PetscErrorCode ierr;
1610   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1611   PetscInt       bs=baij->bs,mbs=baij->mbs;
1612   MatScalar      *a;
1613 
1614   PetscFunctionBegin;
1615   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1616   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1617   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1618   ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1619 
1620   /* copy over the A part */
1621   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1622   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1623   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1624 
1625   for (i=0; i<mbs; i++) {
1626     rvals[0] = bs*(baij->rstart + i);
1627     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1628     for (j=ai[i]; j<ai[i+1]; j++) {
1629       col = (baij->cstart+aj[j])*bs;
1630       for (k=0; k<bs; k++) {
1631         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1632         col++; a += bs;
1633       }
1634     }
1635   }
1636   /* copy over the B part */
1637   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1638   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1639   for (i=0; i<mbs; i++) {
1640     rvals[0] = bs*(baij->rstart + i);
1641     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1642     for (j=ai[i]; j<ai[i+1]; j++) {
1643       col = baij->garray[aj[j]]*bs;
1644       for (k=0; k<bs; k++) {
1645         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1646         col++; a += bs;
1647       }
1648     }
1649   }
1650   ierr = PetscFree(rvals);CHKERRQ(ierr);
1651   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1652   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1653 
1654   if (matout) {
1655     *matout = B;
1656   } else {
1657     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1658   }
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 #undef __FUNCT__
1663 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1664 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1665 {
1666   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1667   Mat            a = baij->A,b = baij->B;
1668   PetscErrorCode ierr;
1669   PetscInt       s1,s2,s3;
1670 
1671   PetscFunctionBegin;
1672   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1673   if (rr) {
1674     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1675     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1676     /* Overlap communication with computation. */
1677     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1678   }
1679   if (ll) {
1680     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1681     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1682     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1683   }
1684   /* scale  the diagonal block */
1685   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1686 
1687   if (rr) {
1688     /* Do a scatter end and then right scale the off-diagonal block */
1689     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1690     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1691   }
1692 
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1698 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1699 {
1700   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1701   PetscErrorCode ierr;
1702   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1703   PetscInt       i,N,*rows,*owners = l->rowners;
1704   PetscInt       *nprocs,j,idx,nsends,row;
1705   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1706   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
1707   PetscInt       *lens,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1708   MPI_Comm       comm = A->comm;
1709   MPI_Request    *send_waits,*recv_waits;
1710   MPI_Status     recv_status,*send_status;
1711   IS             istmp;
1712   PetscTruth     found;
1713 
1714   PetscFunctionBegin;
1715   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1716   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1717 
1718   /*  first count number of contributors to each processor */
1719   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1720   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1721   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1722   for (i=0; i<N; i++) {
1723     idx   = rows[i];
1724     found = PETSC_FALSE;
1725     for (j=0; j<size; j++) {
1726       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1727         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1728       }
1729     }
1730     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1731   }
1732   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1733 
1734   /* inform other processors of number of messages and max length*/
1735   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1736 
1737   /* post receives:   */
1738   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1739   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1740   for (i=0; i<nrecvs; i++) {
1741     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1742   }
1743 
1744   /* do sends:
1745      1) starts[i] gives the starting index in svalues for stuff going to
1746      the ith processor
1747   */
1748   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1749   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1750   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1751   starts[0]  = 0;
1752   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1753   for (i=0; i<N; i++) {
1754     svalues[starts[owner[i]]++] = rows[i];
1755   }
1756   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1757 
1758   starts[0] = 0;
1759   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1760   count = 0;
1761   for (i=0; i<size; i++) {
1762     if (nprocs[2*i+1]) {
1763       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1764     }
1765   }
1766   ierr = PetscFree(starts);CHKERRQ(ierr);
1767 
1768   base = owners[rank]*bs;
1769 
1770   /*  wait on receives */
1771   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1772   source = lens + nrecvs;
1773   count  = nrecvs; slen = 0;
1774   while (count) {
1775     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1776     /* unpack receives into our local space */
1777     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1778     source[imdex]  = recv_status.MPI_SOURCE;
1779     lens[imdex]    = n;
1780     slen          += n;
1781     count--;
1782   }
1783   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1784 
1785   /* move the data into the send scatter */
1786   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1787   count = 0;
1788   for (i=0; i<nrecvs; i++) {
1789     values = rvalues + i*nmax;
1790     for (j=0; j<lens[i]; j++) {
1791       lrows[count++] = values[j] - base;
1792     }
1793   }
1794   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1795   ierr = PetscFree(lens);CHKERRQ(ierr);
1796   ierr = PetscFree(owner);CHKERRQ(ierr);
1797   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1798 
1799   /* actually zap the local rows */
1800   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1801   PetscLogObjectParent(A,istmp);
1802 
1803   /*
1804         Zero the required rows. If the "diagonal block" of the matrix
1805      is square and the user wishes to set the diagonal we use seperate
1806      code so that MatSetValues() is not called for each diagonal allocating
1807      new memory, thus calling lots of mallocs and slowing things down.
1808 
1809        Contributed by: Mathew Knepley
1810   */
1811   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1812   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1813   if (diag && (l->A->M == l->A->N)) {
1814     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1815   } else if (diag) {
1816     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1817     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1818       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1819 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1820     }
1821     for (i=0; i<slen; i++) {
1822       row  = lrows[i] + rstart_bs;
1823       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1824     }
1825     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1826     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1827   } else {
1828     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1829   }
1830 
1831   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1832   ierr = PetscFree(lrows);CHKERRQ(ierr);
1833 
1834   /* wait on sends */
1835   if (nsends) {
1836     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1837     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1838     ierr = PetscFree(send_status);CHKERRQ(ierr);
1839   }
1840   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1841   ierr = PetscFree(svalues);CHKERRQ(ierr);
1842 
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1848 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1849 {
1850   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1851   MPI_Comm          comm = A->comm;
1852   static PetscTruth called = PETSC_FALSE;
1853   PetscErrorCode    ierr;
1854 
1855   PetscFunctionBegin;
1856   if (!a->rank) {
1857     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1858   }
1859   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1860   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1861   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 #undef __FUNCT__
1866 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1867 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1868 {
1869   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1870   PetscErrorCode ierr;
1871 
1872   PetscFunctionBegin;
1873   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1878 
1879 #undef __FUNCT__
1880 #define __FUNCT__ "MatEqual_MPIBAIJ"
1881 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1882 {
1883   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1884   Mat            a,b,c,d;
1885   PetscTruth     flg;
1886   PetscErrorCode ierr;
1887 
1888   PetscFunctionBegin;
1889   a = matA->A; b = matA->B;
1890   c = matB->A; d = matB->B;
1891 
1892   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1893   if (flg == PETSC_TRUE) {
1894     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1895   }
1896   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1903 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1904 {
1905   PetscErrorCode ierr;
1906 
1907   PetscFunctionBegin;
1908   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 /* -------------------------------------------------------------------*/
1913 static struct _MatOps MatOps_Values = {
1914        MatSetValues_MPIBAIJ,
1915        MatGetRow_MPIBAIJ,
1916        MatRestoreRow_MPIBAIJ,
1917        MatMult_MPIBAIJ,
1918 /* 4*/ MatMultAdd_MPIBAIJ,
1919        MatMultTranspose_MPIBAIJ,
1920        MatMultTransposeAdd_MPIBAIJ,
1921        0,
1922        0,
1923        0,
1924 /*10*/ 0,
1925        0,
1926        0,
1927        0,
1928        MatTranspose_MPIBAIJ,
1929 /*15*/ MatGetInfo_MPIBAIJ,
1930        MatEqual_MPIBAIJ,
1931        MatGetDiagonal_MPIBAIJ,
1932        MatDiagonalScale_MPIBAIJ,
1933        MatNorm_MPIBAIJ,
1934 /*20*/ MatAssemblyBegin_MPIBAIJ,
1935        MatAssemblyEnd_MPIBAIJ,
1936        0,
1937        MatSetOption_MPIBAIJ,
1938        MatZeroEntries_MPIBAIJ,
1939 /*25*/ MatZeroRows_MPIBAIJ,
1940        0,
1941        0,
1942        0,
1943        0,
1944 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1945        0,
1946        0,
1947        0,
1948        0,
1949 /*35*/ MatDuplicate_MPIBAIJ,
1950        0,
1951        0,
1952        0,
1953        0,
1954 /*40*/ 0,
1955        MatGetSubMatrices_MPIBAIJ,
1956        MatIncreaseOverlap_MPIBAIJ,
1957        MatGetValues_MPIBAIJ,
1958        0,
1959 /*45*/ MatPrintHelp_MPIBAIJ,
1960        MatScale_MPIBAIJ,
1961        0,
1962        0,
1963        0,
1964 /*50*/ MatGetBlockSize_MPIBAIJ,
1965        0,
1966        0,
1967        0,
1968        0,
1969 /*55*/ 0,
1970        0,
1971        MatSetUnfactored_MPIBAIJ,
1972        0,
1973        MatSetValuesBlocked_MPIBAIJ,
1974 /*60*/ 0,
1975        MatDestroy_MPIBAIJ,
1976        MatView_MPIBAIJ,
1977        MatGetPetscMaps_Petsc,
1978        0,
1979 /*65*/ 0,
1980        0,
1981        0,
1982        0,
1983        0,
1984 /*70*/ MatGetRowMax_MPIBAIJ,
1985        0,
1986        0,
1987        0,
1988        0,
1989 /*75*/ 0,
1990        0,
1991        0,
1992        0,
1993        0,
1994 /*80*/ 0,
1995        0,
1996        0,
1997        0,
1998        MatLoad_MPIBAIJ,
1999 /*85*/ 0,
2000        0,
2001        0,
2002        0,
2003        0,
2004 /*90*/ 0,
2005        0,
2006        0,
2007        0,
2008        0,
2009 /*95*/ 0,
2010        0,
2011        0,
2012        0};
2013 
2014 
2015 EXTERN_C_BEGIN
2016 #undef __FUNCT__
2017 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2018 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2019 {
2020   PetscFunctionBegin;
2021   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2022   *iscopy = PETSC_FALSE;
2023   PetscFunctionReturn(0);
2024 }
2025 EXTERN_C_END
2026 
2027 EXTERN_C_BEGIN
2028 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
2029 EXTERN_C_END
2030 
2031 EXTERN_C_BEGIN
2032 #undef __FUNCT__
2033 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2034 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2035 {
2036   Mat_MPIBAIJ    *b;
2037   PetscErrorCode ierr;
2038   PetscInt       i;
2039 
2040   PetscFunctionBegin;
2041   B->preallocated = PETSC_TRUE;
2042   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2043 
2044   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2045   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2046   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2047   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2048   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2049   if (d_nnz) {
2050   for (i=0; i<B->m/bs; i++) {
2051       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2052     }
2053   }
2054   if (o_nnz) {
2055     for (i=0; i<B->m/bs; i++) {
2056       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2057     }
2058   }
2059 
2060   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2061   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2062   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2063   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2064 
2065   b = (Mat_MPIBAIJ*)B->data;
2066   b->bs  = bs;
2067   b->bs2 = bs*bs;
2068   b->mbs = B->m/bs;
2069   b->nbs = B->n/bs;
2070   b->Mbs = B->M/bs;
2071   b->Nbs = B->N/bs;
2072 
2073   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2074   b->rowners[0]    = 0;
2075   for (i=2; i<=b->size; i++) {
2076     b->rowners[i] += b->rowners[i-1];
2077   }
2078   b->rstart    = b->rowners[b->rank];
2079   b->rend      = b->rowners[b->rank+1];
2080 
2081   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2082   b->cowners[0] = 0;
2083   for (i=2; i<=b->size; i++) {
2084     b->cowners[i] += b->cowners[i-1];
2085   }
2086   b->cstart    = b->cowners[b->rank];
2087   b->cend      = b->cowners[b->rank+1];
2088 
2089   for (i=0; i<=b->size; i++) {
2090     b->rowners_bs[i] = b->rowners[i]*bs;
2091   }
2092   b->rstart_bs = b->rstart*bs;
2093   b->rend_bs   = b->rend*bs;
2094   b->cstart_bs = b->cstart*bs;
2095   b->cend_bs   = b->cend*bs;
2096 
2097   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2098   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2099   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2100   PetscLogObjectParent(B,b->A);
2101   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2102   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2103   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2104   PetscLogObjectParent(B,b->B);
2105 
2106   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2107 
2108   PetscFunctionReturn(0);
2109 }
2110 EXTERN_C_END
2111 
2112 EXTERN_C_BEGIN
2113 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2114 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2115 EXTERN_C_END
2116 
2117 /*MC
2118    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2119 
2120    Options Database Keys:
2121 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2122 
2123   Level: beginner
2124 
2125 .seealso: MatCreateMPIBAIJ
2126 M*/
2127 
2128 EXTERN_C_BEGIN
2129 #undef __FUNCT__
2130 #define __FUNCT__ "MatCreate_MPIBAIJ"
2131 PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2132 {
2133   Mat_MPIBAIJ    *b;
2134   PetscErrorCode ierr;
2135   PetscTruth     flg;
2136 
2137   PetscFunctionBegin;
2138   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2139   B->data = (void*)b;
2140 
2141   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2142   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2143   B->mapping    = 0;
2144   B->factor     = 0;
2145   B->assembled  = PETSC_FALSE;
2146 
2147   B->insertmode = NOT_SET_VALUES;
2148   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2149   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2150 
2151   /* build local table of row and column ownerships */
2152   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2153   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2154   b->cowners    = b->rowners + b->size + 2;
2155   b->rowners_bs = b->cowners + b->size + 2;
2156 
2157   /* build cache for off array entries formed */
2158   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2159   b->donotstash  = PETSC_FALSE;
2160   b->colmap      = PETSC_NULL;
2161   b->garray      = PETSC_NULL;
2162   b->roworiented = PETSC_TRUE;
2163 
2164 #if defined(PETSC_USE_MAT_SINGLE)
2165   /* stuff for MatSetValues_XXX in single precision */
2166   b->setvalueslen     = 0;
2167   b->setvaluescopy    = PETSC_NULL;
2168 #endif
2169 
2170   /* stuff used in block assembly */
2171   b->barray       = 0;
2172 
2173   /* stuff used for matrix vector multiply */
2174   b->lvec         = 0;
2175   b->Mvctx        = 0;
2176 
2177   /* stuff for MatGetRow() */
2178   b->rowindices   = 0;
2179   b->rowvalues    = 0;
2180   b->getrowactive = PETSC_FALSE;
2181 
2182   /* hash table stuff */
2183   b->ht           = 0;
2184   b->hd           = 0;
2185   b->ht_size      = 0;
2186   b->ht_flag      = PETSC_FALSE;
2187   b->ht_fact      = 0;
2188   b->ht_total_ct  = 0;
2189   b->ht_insert_ct = 0;
2190 
2191   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2192   if (flg) {
2193     PetscReal fact = 1.39;
2194     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2195     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2196     if (fact <= 1.0) fact = 1.39;
2197     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2198     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2199   }
2200   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2201                                      "MatStoreValues_MPIBAIJ",
2202                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2203   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2204                                      "MatRetrieveValues_MPIBAIJ",
2205                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2206   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2207                                      "MatGetDiagonalBlock_MPIBAIJ",
2208                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2209   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2210                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2211                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2212   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2213                                      "MatDiagonalScaleLocal_MPIBAIJ",
2214                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2215   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2216                                      "MatSetHashTableFactor_MPIBAIJ",
2217                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 EXTERN_C_END
2221 
2222 /*MC
2223    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2224 
2225    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2226    and MATMPIBAIJ otherwise.
2227 
2228    Options Database Keys:
2229 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2230 
2231   Level: beginner
2232 
2233 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2234 M*/
2235 
2236 EXTERN_C_BEGIN
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatCreate_BAIJ"
2239 PetscErrorCode MatCreate_BAIJ(Mat A)
2240 {
2241   PetscErrorCode ierr;
2242   PetscMPIInt    size;
2243 
2244   PetscFunctionBegin;
2245   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2246   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2247   if (size == 1) {
2248     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2249   } else {
2250     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2251   }
2252   PetscFunctionReturn(0);
2253 }
2254 EXTERN_C_END
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2258 /*@C
2259    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2260    (block compressed row).  For good matrix assembly performance
2261    the user should preallocate the matrix storage by setting the parameters
2262    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2263    performance can be increased by more than a factor of 50.
2264 
2265    Collective on Mat
2266 
2267    Input Parameters:
2268 +  A - the matrix
2269 .  bs   - size of blockk
2270 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2271            submatrix  (same for all local rows)
2272 .  d_nnz - array containing the number of block nonzeros in the various block rows
2273            of the in diagonal portion of the local (possibly different for each block
2274            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2275 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2276            submatrix (same for all local rows).
2277 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2278            off-diagonal portion of the local submatrix (possibly different for
2279            each block row) or PETSC_NULL.
2280 
2281    Output Parameter:
2282 
2283 
2284    Options Database Keys:
2285 .   -mat_no_unroll - uses code that does not unroll the loops in the
2286                      block calculations (much slower)
2287 .   -mat_block_size - size of the blocks to use
2288 
2289    Notes:
2290    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2291    than it must be used on all processors that share the object for that argument.
2292 
2293    Storage Information:
2294    For a square global matrix we define each processor's diagonal portion
2295    to be its local rows and the corresponding columns (a square submatrix);
2296    each processor's off-diagonal portion encompasses the remainder of the
2297    local matrix (a rectangular submatrix).
2298 
2299    The user can specify preallocated storage for the diagonal part of
2300    the local submatrix with either d_nz or d_nnz (not both).  Set
2301    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2302    memory allocation.  Likewise, specify preallocated storage for the
2303    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2304 
2305    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2306    the figure below we depict these three local rows and all columns (0-11).
2307 
2308 .vb
2309            0 1 2 3 4 5 6 7 8 9 10 11
2310           -------------------
2311    row 3  |  o o o d d d o o o o o o
2312    row 4  |  o o o d d d o o o o o o
2313    row 5  |  o o o d d d o o o o o o
2314           -------------------
2315 .ve
2316 
2317    Thus, any entries in the d locations are stored in the d (diagonal)
2318    submatrix, and any entries in the o locations are stored in the
2319    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2320    stored simply in the MATSEQBAIJ format for compressed row storage.
2321 
2322    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2323    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2324    In general, for PDE problems in which most nonzeros are near the diagonal,
2325    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2326    or you will get TERRIBLE performance; see the users' manual chapter on
2327    matrices.
2328 
2329    Level: intermediate
2330 
2331 .keywords: matrix, block, aij, compressed row, sparse, parallel
2332 
2333 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2334 @*/
2335 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2336 {
2337   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2338 
2339   PetscFunctionBegin;
2340   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2341   if (f) {
2342     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2343   }
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatCreateMPIBAIJ"
2349 /*@C
2350    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2351    (block compressed row).  For good matrix assembly performance
2352    the user should preallocate the matrix storage by setting the parameters
2353    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2354    performance can be increased by more than a factor of 50.
2355 
2356    Collective on MPI_Comm
2357 
2358    Input Parameters:
2359 +  comm - MPI communicator
2360 .  bs   - size of blockk
2361 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2362            This value should be the same as the local size used in creating the
2363            y vector for the matrix-vector product y = Ax.
2364 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2365            This value should be the same as the local size used in creating the
2366            x vector for the matrix-vector product y = Ax.
2367 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2368 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2369 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2370            submatrix  (same for all local rows)
2371 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2372            of the in diagonal portion of the local (possibly different for each block
2373            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2374 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2375            submatrix (same for all local rows).
2376 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2377            off-diagonal portion of the local submatrix (possibly different for
2378            each block row) or PETSC_NULL.
2379 
2380    Output Parameter:
2381 .  A - the matrix
2382 
2383    Options Database Keys:
2384 .   -mat_no_unroll - uses code that does not unroll the loops in the
2385                      block calculations (much slower)
2386 .   -mat_block_size - size of the blocks to use
2387 
2388    Notes:
2389    A nonzero block is any block that as 1 or more nonzeros in it
2390 
2391    The user MUST specify either the local or global matrix dimensions
2392    (possibly both).
2393 
2394    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2395    than it must be used on all processors that share the object for that argument.
2396 
2397    Storage Information:
2398    For a square global matrix we define each processor's diagonal portion
2399    to be its local rows and the corresponding columns (a square submatrix);
2400    each processor's off-diagonal portion encompasses the remainder of the
2401    local matrix (a rectangular submatrix).
2402 
2403    The user can specify preallocated storage for the diagonal part of
2404    the local submatrix with either d_nz or d_nnz (not both).  Set
2405    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2406    memory allocation.  Likewise, specify preallocated storage for the
2407    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2408 
2409    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2410    the figure below we depict these three local rows and all columns (0-11).
2411 
2412 .vb
2413            0 1 2 3 4 5 6 7 8 9 10 11
2414           -------------------
2415    row 3  |  o o o d d d o o o o o o
2416    row 4  |  o o o d d d o o o o o o
2417    row 5  |  o o o d d d o o o o o o
2418           -------------------
2419 .ve
2420 
2421    Thus, any entries in the d locations are stored in the d (diagonal)
2422    submatrix, and any entries in the o locations are stored in the
2423    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2424    stored simply in the MATSEQBAIJ format for compressed row storage.
2425 
2426    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2427    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2428    In general, for PDE problems in which most nonzeros are near the diagonal,
2429    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2430    or you will get TERRIBLE performance; see the users' manual chapter on
2431    matrices.
2432 
2433    Level: intermediate
2434 
2435 .keywords: matrix, block, aij, compressed row, sparse, parallel
2436 
2437 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2438 @*/
2439 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2440 {
2441   PetscErrorCode ierr;
2442   PetscMPIInt    size;
2443 
2444   PetscFunctionBegin;
2445   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2446   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2447   if (size > 1) {
2448     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2449     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2450   } else {
2451     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2452     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2453   }
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #undef __FUNCT__
2458 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2459 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2460 {
2461   Mat            mat;
2462   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2463   PetscErrorCode ierr;
2464   PetscInt       len=0;
2465 
2466   PetscFunctionBegin;
2467   *newmat       = 0;
2468   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2469   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2470   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2471 
2472   mat->factor       = matin->factor;
2473   mat->preallocated = PETSC_TRUE;
2474   mat->assembled    = PETSC_TRUE;
2475   mat->insertmode   = NOT_SET_VALUES;
2476 
2477   a      = (Mat_MPIBAIJ*)mat->data;
2478   a->bs  = oldmat->bs;
2479   a->bs2 = oldmat->bs2;
2480   a->mbs = oldmat->mbs;
2481   a->nbs = oldmat->nbs;
2482   a->Mbs = oldmat->Mbs;
2483   a->Nbs = oldmat->Nbs;
2484 
2485   a->rstart       = oldmat->rstart;
2486   a->rend         = oldmat->rend;
2487   a->cstart       = oldmat->cstart;
2488   a->cend         = oldmat->cend;
2489   a->size         = oldmat->size;
2490   a->rank         = oldmat->rank;
2491   a->donotstash   = oldmat->donotstash;
2492   a->roworiented  = oldmat->roworiented;
2493   a->rowindices   = 0;
2494   a->rowvalues    = 0;
2495   a->getrowactive = PETSC_FALSE;
2496   a->barray       = 0;
2497   a->rstart_bs    = oldmat->rstart_bs;
2498   a->rend_bs      = oldmat->rend_bs;
2499   a->cstart_bs    = oldmat->cstart_bs;
2500   a->cend_bs      = oldmat->cend_bs;
2501 
2502   /* hash table stuff */
2503   a->ht           = 0;
2504   a->hd           = 0;
2505   a->ht_size      = 0;
2506   a->ht_flag      = oldmat->ht_flag;
2507   a->ht_fact      = oldmat->ht_fact;
2508   a->ht_total_ct  = 0;
2509   a->ht_insert_ct = 0;
2510 
2511   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2512   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2513   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2514   if (oldmat->colmap) {
2515 #if defined (PETSC_USE_CTABLE)
2516   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2517 #else
2518   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2519   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2520   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2521 #endif
2522   } else a->colmap = 0;
2523 
2524   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2525     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2526     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2527     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2528   } else a->garray = 0;
2529 
2530   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2531   PetscLogObjectParent(mat,a->lvec);
2532   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2533   PetscLogObjectParent(mat,a->Mvctx);
2534 
2535   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2536   PetscLogObjectParent(mat,a->A);
2537   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2538   PetscLogObjectParent(mat,a->B);
2539   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2540   *newmat = mat;
2541 
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 #include "petscsys.h"
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatLoad_MPIBAIJ"
2549 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2550 {
2551   Mat            A;
2552   PetscErrorCode ierr;
2553   int            fd;
2554   PetscInt       i,nz,j,rstart,rend;
2555   PetscScalar    *vals,*buf;
2556   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2557   MPI_Status     status;
2558   PetscMPIInt    rank,size,maxnz;
2559   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2560   PetscMPIInt    *locrowlens,*sndcounts = 0,*procsnz = 0,*browners;
2561   PetscInt       jj,*mycols,*ibuf;
2562   PetscInt       tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2563   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2564   PetscInt       dcount,kmax,k,nzcount,tmp;
2565 
2566   PetscFunctionBegin;
2567   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2568 
2569   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2570   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2571   if (!rank) {
2572     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2573     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2574     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2575     if (header[3] < 0) {
2576       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2577     }
2578   }
2579 
2580   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2581   M = header[1]; N = header[2];
2582 
2583   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2584 
2585   /*
2586      This code adds extra rows to make sure the number of rows is
2587      divisible by the blocksize
2588   */
2589   Mbs        = M/bs;
2590   extra_rows = bs - M + bs*(Mbs);
2591   if (extra_rows == bs) extra_rows = 0;
2592   else                  Mbs++;
2593   if (extra_rows &&!rank) {
2594     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2595   }
2596 
2597   /* determine ownership of all rows */
2598   mbs        = Mbs/size + ((Mbs % size) > rank);
2599   m          = mbs*bs;
2600   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2601   browners   = (PetscMPIInt*)rowners + size + 1;
2602   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2603   rowners[0] = 0;
2604   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2605   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2606   rstart = rowners[rank];
2607   rend   = rowners[rank+1];
2608 
2609   /* distribute row lengths to all processors */
2610   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2611   if (!rank) {
2612     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2613     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2614     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2615     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2616     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2617     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2618     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2619   } else {
2620     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2621   }
2622 
2623   if (!rank) {
2624     /* calculate the number of nonzeros on each processor */
2625     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2626     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2627     for (i=0; i<size; i++) {
2628       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2629         procsnz[i] += rowlengths[j];
2630       }
2631     }
2632     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2633 
2634     /* determine max buffer needed and allocate it */
2635     maxnz = 0;
2636     for (i=0; i<size; i++) {
2637       maxnz = PetscMax(maxnz,procsnz[i]);
2638     }
2639     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2640 
2641     /* read in my part of the matrix column indices  */
2642     nz     = procsnz[0];
2643     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2644     mycols = ibuf;
2645     if (size == 1)  nz -= extra_rows;
2646     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2647     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2648 
2649     /* read in every ones (except the last) and ship off */
2650     for (i=1; i<size-1; i++) {
2651       nz   = procsnz[i];
2652       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2653       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2654     }
2655     /* read in the stuff for the last proc */
2656     if (size != 1) {
2657       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2658       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2659       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2660       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2661     }
2662     ierr = PetscFree(cols);CHKERRQ(ierr);
2663   } else {
2664     /* determine buffer space needed for message */
2665     nz = 0;
2666     for (i=0; i<m; i++) {
2667       nz += locrowlens[i];
2668     }
2669     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2670     mycols = ibuf;
2671     /* receive message of column indices*/
2672     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2673     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2674     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2675   }
2676 
2677   /* loop over local rows, determining number of off diagonal entries */
2678   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2679   odlens   = dlens + (rend-rstart);
2680   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2681   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2682   masked1  = mask    + Mbs;
2683   masked2  = masked1 + Mbs;
2684   rowcount = 0; nzcount = 0;
2685   for (i=0; i<mbs; i++) {
2686     dcount  = 0;
2687     odcount = 0;
2688     for (j=0; j<bs; j++) {
2689       kmax = locrowlens[rowcount];
2690       for (k=0; k<kmax; k++) {
2691         tmp = mycols[nzcount++]/bs;
2692         if (!mask[tmp]) {
2693           mask[tmp] = 1;
2694           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2695           else masked1[dcount++] = tmp;
2696         }
2697       }
2698       rowcount++;
2699     }
2700 
2701     dlens[i]  = dcount;
2702     odlens[i] = odcount;
2703 
2704     /* zero out the mask elements we set */
2705     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2706     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2707   }
2708 
2709   /* create our matrix */
2710   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2711   ierr = MatSetType(A,type);CHKERRQ(ierr)
2712   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2713 
2714   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2715   MatSetOption(A,MAT_COLUMNS_SORTED);
2716 
2717   if (!rank) {
2718     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2719     /* read in my part of the matrix numerical values  */
2720     nz = procsnz[0];
2721     vals = buf;
2722     mycols = ibuf;
2723     if (size == 1)  nz -= extra_rows;
2724     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2725     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2726 
2727     /* insert into matrix */
2728     jj      = rstart*bs;
2729     for (i=0; i<m; i++) {
2730       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2731       mycols += locrowlens[i];
2732       vals   += locrowlens[i];
2733       jj++;
2734     }
2735     /* read in other processors (except the last one) and ship out */
2736     for (i=1; i<size-1; i++) {
2737       nz   = procsnz[i];
2738       vals = buf;
2739       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2740       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2741     }
2742     /* the last proc */
2743     if (size != 1){
2744       nz   = procsnz[i] - extra_rows;
2745       vals = buf;
2746       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2747       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2748       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2749     }
2750     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2751   } else {
2752     /* receive numeric values */
2753     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2754 
2755     /* receive message of values*/
2756     vals   = buf;
2757     mycols = ibuf;
2758     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2759     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2760     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2761 
2762     /* insert into matrix */
2763     jj      = rstart*bs;
2764     for (i=0; i<m; i++) {
2765       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2766       mycols += locrowlens[i];
2767       vals   += locrowlens[i];
2768       jj++;
2769     }
2770   }
2771   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2772   ierr = PetscFree(buf);CHKERRQ(ierr);
2773   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2774   ierr = PetscFree(rowners);CHKERRQ(ierr);
2775   ierr = PetscFree(dlens);CHKERRQ(ierr);
2776   ierr = PetscFree(mask);CHKERRQ(ierr);
2777   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2778   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2779 
2780   *newmat = A;
2781   PetscFunctionReturn(0);
2782 }
2783 
2784 #undef __FUNCT__
2785 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2786 /*@
2787    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2788 
2789    Input Parameters:
2790 .  mat  - the matrix
2791 .  fact - factor
2792 
2793    Collective on Mat
2794 
2795    Level: advanced
2796 
2797   Notes:
2798    This can also be set by the command line option: -mat_use_hash_table fact
2799 
2800 .keywords: matrix, hashtable, factor, HT
2801 
2802 .seealso: MatSetOption()
2803 @*/
2804 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2805 {
2806   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2807 
2808   PetscFunctionBegin;
2809   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2810   if (f) {
2811     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2812   }
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 #undef __FUNCT__
2817 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2818 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2819 {
2820   Mat_MPIBAIJ *baij;
2821 
2822   PetscFunctionBegin;
2823   baij = (Mat_MPIBAIJ*)mat->data;
2824   baij->ht_fact = fact;
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2830 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2831 {
2832   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2833   PetscFunctionBegin;
2834   *Ad     = a->A;
2835   *Ao     = a->B;
2836   *colmap = a->garray;
2837   PetscFunctionReturn(0);
2838 }
2839