xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 41ede01b5d5578d8a64bd3d3c354925472bf78b0)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.159 1999/03/11 23:21:46 balay Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 /*
20      Local utility routine that creates a mapping from the global column
21    number to the local number in the off-diagonal part of the local
22    storage of the matrix.  This is done in a non scable way since the
23    length of colmap equals the global matrix length.
24 */
25 #undef __FUNC__
26 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
27 static int CreateColmap_MPIBAIJ_Private(Mat mat)
28 {
29   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
30   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
31   int         nbs = B->nbs,i,bs=B->bs;
32 #if defined (USE_CTABLE)
33   int         ierr;
34 #endif
35 
36   PetscFunctionBegin;
37 #if defined (USE_CTABLE)
38   ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr);
39   for ( i=0; i<nbs; i++ ){
40     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
41   }
42 #else
43   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
44   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
45   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
46   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
47 #endif
48   PetscFunctionReturn(0);
49 }
50 
51 #define CHUNKSIZE  10
52 
53 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
54 { \
55  \
56     brow = row/bs;  \
57     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
58     rmax = aimax[brow]; nrow = ailen[brow]; \
59       bcol = col/bs; \
60       ridx = row % bs; cidx = col % bs; \
61       low = 0; high = nrow; \
62       while (high-low > 3) { \
63         t = (low+high)/2; \
64         if (rp[t] > bcol) high = t; \
65         else              low  = t; \
66       } \
67       for ( _i=low; _i<high; _i++ ) { \
68         if (rp[_i] > bcol) break; \
69         if (rp[_i] == bcol) { \
70           bap  = ap +  bs2*_i + bs*cidx + ridx; \
71           if (addv == ADD_VALUES) *bap += value;  \
72           else                    *bap  = value;  \
73           goto a_noinsert; \
74         } \
75       } \
76       if (a->nonew == 1) goto a_noinsert; \
77       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
78       if (nrow >= rmax) { \
79         /* there is no extra room in row, therefore enlarge */ \
80         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
81         Scalar *new_a; \
82  \
83         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
84  \
85         /* malloc new storage space */ \
86         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
87         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
88         new_j   = (int *) (new_a + bs2*new_nz); \
89         new_i   = new_j + new_nz; \
90  \
91         /* copy over old data into new slots */ \
92         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
93         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
94         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
95         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
96         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
97                                                            len*sizeof(int)); \
98         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
99         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
100         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
101                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
102         /* free up old matrix storage */ \
103         PetscFree(a->a);  \
104         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
105         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
106         a->singlemalloc = 1; \
107  \
108         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
110         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
111         a->maxnz += bs2*CHUNKSIZE; \
112         a->reallocs++; \
113         a->nz++; \
114       } \
115       N = nrow++ - 1;  \
116       /* shift up all the later entries in this row */ \
117       for ( ii=N; ii>=_i; ii-- ) { \
118         rp[ii+1] = rp[ii]; \
119         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
120       } \
121       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
122       rp[_i]                      = bcol;  \
123       ap[bs2*_i + bs*cidx + ridx] = value;  \
124       a_noinsert:; \
125     ailen[brow] = nrow; \
126 }
127 
128 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
129 { \
130  \
131     brow = row/bs;  \
132     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
133     rmax = bimax[brow]; nrow = bilen[brow]; \
134       bcol = col/bs; \
135       ridx = row % bs; cidx = col % bs; \
136       low = 0; high = nrow; \
137       while (high-low > 3) { \
138         t = (low+high)/2; \
139         if (rp[t] > bcol) high = t; \
140         else              low  = t; \
141       } \
142       for ( _i=low; _i<high; _i++ ) { \
143         if (rp[_i] > bcol) break; \
144         if (rp[_i] == bcol) { \
145           bap  = ap +  bs2*_i + bs*cidx + ridx; \
146           if (addv == ADD_VALUES) *bap += value;  \
147           else                    *bap  = value;  \
148           goto b_noinsert; \
149         } \
150       } \
151       if (b->nonew == 1) goto b_noinsert; \
152       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
153       if (nrow >= rmax) { \
154         /* there is no extra room in row, therefore enlarge */ \
155         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
156         Scalar *new_a; \
157  \
158         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
159  \
160         /* malloc new storage space */ \
161         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
162         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
163         new_j   = (int *) (new_a + bs2*new_nz); \
164         new_i   = new_j + new_nz; \
165  \
166         /* copy over old data into new slots */ \
167         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
168         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
169         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
170         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
171         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
172                                                            len*sizeof(int)); \
173         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
174         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
175         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
176                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
177         /* free up old matrix storage */ \
178         PetscFree(b->a);  \
179         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
180         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
181         b->singlemalloc = 1; \
182  \
183         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
184         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
185         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
186         b->maxnz += bs2*CHUNKSIZE; \
187         b->reallocs++; \
188         b->nz++; \
189       } \
190       N = nrow++ - 1;  \
191       /* shift up all the later entries in this row */ \
192       for ( ii=N; ii>=_i; ii-- ) { \
193         rp[ii+1] = rp[ii]; \
194         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
195       } \
196       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
197       rp[_i]                      = bcol;  \
198       ap[bs2*_i + bs*cidx + ridx] = value;  \
199       b_noinsert:; \
200     bilen[brow] = nrow; \
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ "MatSetValues_MPIBAIJ"
205 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
206 {
207   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
208   Scalar      value;
209   int         ierr,i,j,row,col;
210   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
211   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
212   int         cend_orig=baij->cend_bs,bs=baij->bs;
213 
214   /* Some Variables required in the macro */
215   Mat         A = baij->A;
216   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
217   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
218   Scalar      *aa=a->a;
219 
220   Mat         B = baij->B;
221   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
222   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
223   Scalar      *ba=b->a;
224 
225   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
226   int         low,high,t,ridx,cidx,bs2=a->bs2;
227   Scalar      *ap,*bap;
228 
229   PetscFunctionBegin;
230   for ( i=0; i<m; i++ ) {
231     if (im[i] < 0) continue;
232 #if defined(USE_PETSC_BOPT_g)
233     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
234 #endif
235     if (im[i] >= rstart_orig && im[i] < rend_orig) {
236       row = im[i] - rstart_orig;
237       for ( j=0; j<n; j++ ) {
238         if (in[j] >= cstart_orig && in[j] < cend_orig){
239           col = in[j] - cstart_orig;
240           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
241           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
242           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         } else if (in[j] < 0) continue;
244 #if defined(USE_PETSC_BOPT_g)
245         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
246 #endif
247         else {
248           if (mat->was_assembled) {
249             if (!baij->colmap) {
250               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
251             }
252 #if defined (USE_CTABLE)
253             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr);
254             col  = col - 1 + in[j]%bs;
255 #else
256             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
257 #endif
258             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
259               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
260               col =  in[j];
261               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
262               B = baij->B;
263               b = (Mat_SeqBAIJ *) (B)->data;
264               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
265               ba=b->a;
266             }
267           } else col = in[j];
268           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
269           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
270           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
271         }
272       }
273     } else {
274       if (roworiented && !baij->donotstash) {
275         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
276       } else {
277         if (!baij->donotstash) {
278           row = im[i];
279 	  for ( j=0; j<n; j++ ) {
280 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr);
281           }
282         }
283       }
284     }
285   }
286   PetscFunctionReturn(0);
287 }
288 
289 #undef __FUNC__
290 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
291 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
292 {
293   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
294   Scalar      *value,*barray=baij->barray;
295   int         ierr,i,j,ii,jj,row,col;
296   int         roworiented = baij->roworiented,rstart=baij->rstart ;
297   int         rend=baij->rend,cstart=baij->cstart,stepval;
298   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
299 
300   if(!barray) {
301     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
302   }
303 
304   if (roworiented) {
305     stepval = (n-1)*bs;
306   } else {
307     stepval = (m-1)*bs;
308   }
309   for ( i=0; i<m; i++ ) {
310     if (im[i] < 0) continue;
311 #if defined(USE_PETSC_BOPT_g)
312     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
313 #endif
314     if (im[i] >= rstart && im[i] < rend) {
315       row = im[i] - rstart;
316       for ( j=0; j<n; j++ ) {
317         /* If NumCol = 1 then a copy is not required */
318         if ((roworiented) && (n == 1)) {
319           barray = v + i*bs2;
320         } else if((!roworiented) && (m == 1)) {
321           barray = v + j*bs2;
322         } else { /* Here a copy is required */
323           if (roworiented) {
324             value = v + i*(stepval+bs)*bs + j*bs;
325           } else {
326             value = v + j*(stepval+bs)*bs + i*bs;
327           }
328           for ( ii=0; ii<bs; ii++,value+=stepval ) {
329             for (jj=0; jj<bs; jj++ ) {
330               *barray++  = *value++;
331             }
332           }
333           barray -=bs2;
334         }
335 
336         if (in[j] >= cstart && in[j] < cend){
337           col  = in[j] - cstart;
338           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
339         }
340         else if (in[j] < 0) continue;
341 #if defined(USE_PETSC_BOPT_g)
342         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
343 #endif
344         else {
345           if (mat->was_assembled) {
346             if (!baij->colmap) {
347               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
348             }
349 
350 #if defined(USE_PETSC_BOPT_g)
351 #if defined (USE_CTABLE)
352             { int data;
353             ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr);
354             if((data - 1) % bs)
355 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
356             }
357 #else
358             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
359 #endif
360 #endif
361 #if defined (USE_CTABLE)
362 	    ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr);
363             col  = (col - 1)/bs;
364 #else
365             col = (baij->colmap[in[j]] - 1)/bs;
366 #endif
367             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
368               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
369               col =  in[j];
370             }
371           }
372           else col = in[j];
373           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
374         }
375       }
376     } else {
377       if (!baij->donotstash) {
378         /*        ierr = StashValuesBlocked_Private(&baij->bstash,im[i],n,in,v+i*bs2*n,
379                                           roworiented,stepval,addv);CHKERRQ(ierr); */
380         ierr = StashValuesBlocked_Private(&baij->bstash,im[i],n,in,v,m,n,
381                                           roworiented,i);CHKERRQ(ierr);
382       }
383     }
384   }
385   PetscFunctionReturn(0);
386 }
387 #define HASH_KEY 0.6180339887
388 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
389 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
390 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
391 #undef __FUNC__
392 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
393 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
394 {
395   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
396   int         ierr,i,j,row,col;
397   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
398   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
399   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
400   double      tmp;
401   Scalar      ** HD = baij->hd,value;
402 #if defined(USE_PETSC_BOPT_g)
403   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
404 #endif
405 
406   PetscFunctionBegin;
407 
408   for ( i=0; i<m; i++ ) {
409 #if defined(USE_PETSC_BOPT_g)
410     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
411     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
412 #endif
413       row = im[i];
414     if (row >= rstart_orig && row < rend_orig) {
415       for ( j=0; j<n; j++ ) {
416         col = in[j];
417         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
418         /* Look up into the Hash Table */
419         key = (row/bs)*Nbs+(col/bs)+1;
420         h1  = HASH(size,key,tmp);
421 
422 
423         idx = h1;
424 #if defined(USE_PETSC_BOPT_g)
425         insert_ct++;
426         total_ct++;
427         if (HT[idx] != key) {
428           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
429           if (idx == size) {
430             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
431             if (idx == h1) {
432               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
433             }
434           }
435         }
436 #else
437         if (HT[idx] != key) {
438           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
439           if (idx == size) {
440             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
441             if (idx == h1) {
442               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
443             }
444           }
445         }
446 #endif
447         /* A HASH table entry is found, so insert the values at the correct address */
448         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
449         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
450       }
451     } else {
452       if (roworiented && !baij->donotstash) {
453         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
454       } else {
455         if (!baij->donotstash) {
456           row = im[i];
457 	  for ( j=0; j<n; j++ ) {
458 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr);
459           }
460         }
461       }
462     }
463   }
464 #if defined(USE_PETSC_BOPT_g)
465   baij->ht_total_ct = total_ct;
466   baij->ht_insert_ct = insert_ct;
467 #endif
468   PetscFunctionReturn(0);
469 }
470 
471 #undef __FUNC__
472 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
473 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
474 {
475   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
476   int         ierr,i,j,ii,jj,row,col,k,l;
477   int         roworiented = baij->roworiented,rstart=baij->rstart ;
478   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
479   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
480   double      tmp;
481   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
482 #if defined(USE_PETSC_BOPT_g)
483   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
484 #endif
485 
486   PetscFunctionBegin;
487 
488   if (roworiented) {
489     stepval = (n-1)*bs;
490   } else {
491     stepval = (m-1)*bs;
492   }
493   for ( i=0; i<m; i++ ) {
494 #if defined(USE_PETSC_BOPT_g)
495     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
496     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
497 #endif
498     row   = im[i];
499     v_t   = v + i*bs2;
500     if (row >= rstart && row < rend) {
501       for ( j=0; j<n; j++ ) {
502         col = in[j];
503 
504         /* Look up into the Hash Table */
505         key = row*Nbs+col+1;
506         h1  = HASH(size,key,tmp);
507 
508         idx = h1;
509 #if defined(USE_PETSC_BOPT_g)
510         total_ct++;
511         insert_ct++;
512        if (HT[idx] != key) {
513           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
514           if (idx == size) {
515             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
516             if (idx == h1) {
517               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
518             }
519           }
520         }
521 #else
522         if (HT[idx] != key) {
523           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
524           if (idx == size) {
525             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
526             if (idx == h1) {
527               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
528             }
529           }
530         }
531 #endif
532         baij_a = HD[idx];
533         if (roworiented) {
534           /*value = v + i*(stepval+bs)*bs + j*bs;*/
535           /* value = v + (i*(stepval+bs)+j)*bs; */
536           value = v_t;
537           v_t  += bs;
538           if (addv == ADD_VALUES) {
539             for ( ii=0; ii<bs; ii++,value+=stepval) {
540               for ( jj=ii; jj<bs2; jj+=bs ) {
541                 baij_a[jj]  += *value++;
542               }
543             }
544           } else {
545             for ( ii=0; ii<bs; ii++,value+=stepval) {
546               for ( jj=ii; jj<bs2; jj+=bs ) {
547                 baij_a[jj]  = *value++;
548               }
549             }
550           }
551         } else {
552           value = v + j*(stepval+bs)*bs + i*bs;
553           if (addv == ADD_VALUES) {
554             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
555               for ( jj=0; jj<bs; jj++ ) {
556                 baij_a[jj]  += *value++;
557               }
558             }
559           } else {
560             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
561               for ( jj=0; jj<bs; jj++ ) {
562                 baij_a[jj]  = *value++;
563               }
564             }
565           }
566         }
567       }
568     } else {
569       if (!baij->donotstash) {
570         if (roworiented ) {
571           row   = im[i]*bs;
572           value = v + i*(stepval+bs)*bs;
573           for ( j=0; j<bs; j++,row++ ) {
574             for ( k=0; k<n; k++ ) {
575               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
576                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr);
577               }
578             }
579           }
580         } else {
581           for ( j=0; j<n; j++ ) {
582             value = v + j*(stepval+bs)*bs + i*bs;
583             col   = in[j]*bs;
584             for ( k=0; k<bs; k++,col++,value+=stepval) {
585               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
586                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr);
587               }
588             }
589           }
590         }
591       }
592     }
593   }
594 #if defined(USE_PETSC_BOPT_g)
595   baij->ht_total_ct = total_ct;
596   baij->ht_insert_ct = insert_ct;
597 #endif
598   PetscFunctionReturn(0);
599 }
600 
601 #undef __FUNC__
602 #define __FUNC__ "MatGetValues_MPIBAIJ"
603 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
604 {
605   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
606   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
607   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
608 
609   PetscFunctionBegin;
610   for ( i=0; i<m; i++ ) {
611     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
612     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
613     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
614       row = idxm[i] - bsrstart;
615       for ( j=0; j<n; j++ ) {
616         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
617         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
618         if (idxn[j] >= bscstart && idxn[j] < bscend){
619           col = idxn[j] - bscstart;
620           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
621         } else {
622           if (!baij->colmap) {
623             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
624           }
625 #if defined (USE_CTABLE)
626           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr);
627           data --;
628 #else
629           data = baij->colmap[idxn[j]/bs]-1;
630 #endif
631           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
632           else {
633             col  = data + idxn[j]%bs;
634             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
635           }
636         }
637       }
638     } else {
639       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
640     }
641   }
642  PetscFunctionReturn(0);
643 }
644 
645 #undef __FUNC__
646 #define __FUNC__ "MatNorm_MPIBAIJ"
647 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
648 {
649   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
650   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
651   int        ierr, i,bs2=baij->bs2;
652   double     sum = 0.0;
653   Scalar     *v;
654 
655   PetscFunctionBegin;
656   if (baij->size == 1) {
657     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
658   } else {
659     if (type == NORM_FROBENIUS) {
660       v = amat->a;
661       for (i=0; i<amat->nz*bs2; i++ ) {
662 #if defined(USE_PETSC_COMPLEX)
663         sum += PetscReal(PetscConj(*v)*(*v)); v++;
664 #else
665         sum += (*v)*(*v); v++;
666 #endif
667       }
668       v = bmat->a;
669       for (i=0; i<bmat->nz*bs2; i++ ) {
670 #if defined(USE_PETSC_COMPLEX)
671         sum += PetscReal(PetscConj(*v)*(*v)); v++;
672 #else
673         sum += (*v)*(*v); v++;
674 #endif
675       }
676       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
677       *norm = sqrt(*norm);
678     } else {
679       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
680     }
681   }
682   PetscFunctionReturn(0);
683 }
684 
685 
686 /*
687   Creates the hash table, and sets the table
688   This table is created only once.
689   If new entried need to be added to the matrix
690   then the hash table has to be destroyed and
691   recreated.
692 */
693 #undef __FUNC__
694 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
695 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
696 {
697   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
698   Mat         A = baij->A, B=baij->B;
699   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
700   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
701   int         size,bs2=baij->bs2,rstart=baij->rstart;
702   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
703   int         *HT,key;
704   Scalar      **HD;
705   double      tmp;
706 #if defined(USE_PETSC_BOPT_g)
707   int         ct=0,max=0;
708 #endif
709 
710   PetscFunctionBegin;
711   baij->ht_size=(int)(factor*nz);
712   size = baij->ht_size;
713 
714   if (baij->ht) {
715     PetscFunctionReturn(0);
716   }
717 
718   /* Allocate Memory for Hash Table */
719   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
720   baij->ht = (int*)(baij->hd + size);
721   HD = baij->hd;
722   HT = baij->ht;
723 
724 
725   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
726 
727 
728   /* Loop Over A */
729   for ( i=0; i<a->mbs; i++ ) {
730     for ( j=ai[i]; j<ai[i+1]; j++ ) {
731       row = i+rstart;
732       col = aj[j]+cstart;
733 
734       key = row*Nbs + col + 1;
735       h1  = HASH(size,key,tmp);
736       for ( k=0; k<size; k++ ){
737         if (HT[(h1+k)%size] == 0.0) {
738           HT[(h1+k)%size] = key;
739           HD[(h1+k)%size] = a->a + j*bs2;
740           break;
741 #if defined(USE_PETSC_BOPT_g)
742         } else {
743           ct++;
744 #endif
745         }
746       }
747 #if defined(USE_PETSC_BOPT_g)
748       if (k> max) max = k;
749 #endif
750     }
751   }
752   /* Loop Over B */
753   for ( i=0; i<b->mbs; i++ ) {
754     for ( j=bi[i]; j<bi[i+1]; j++ ) {
755       row = i+rstart;
756       col = garray[bj[j]];
757       key = row*Nbs + col + 1;
758       h1  = HASH(size,key,tmp);
759       for ( k=0; k<size; k++ ){
760         if (HT[(h1+k)%size] == 0.0) {
761           HT[(h1+k)%size] = key;
762           HD[(h1+k)%size] = b->a + j*bs2;
763           break;
764 #if defined(USE_PETSC_BOPT_g)
765         } else {
766           ct++;
767 #endif
768         }
769       }
770 #if defined(USE_PETSC_BOPT_g)
771       if (k> max) max = k;
772 #endif
773     }
774   }
775 
776   /* Print Summary */
777 #if defined(USE_PETSC_BOPT_g)
778   for ( i=0,j=0; i<size; i++)
779     if (HT[i]) {j++;}
780   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
781            (j== 0)? 0.0:((double)(ct+j))/j,max);
782 #endif
783   PetscFunctionReturn(0);
784 }
785 
786 #undef __FUNC__
787 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
788 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
789 {
790   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
791   int         ierr;
792   InsertMode  addv;
793 
794   PetscFunctionBegin;
795   if (baij->donotstash) {
796     PetscFunctionReturn(0);
797   }
798 
799   /* make sure all processors are either in INSERTMODE or ADDMODE */
800   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
801   if (addv == (ADD_VALUES|INSERT_VALUES)) {
802     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
803   }
804   mat->insertmode = addv; /* in case this processor had no cache */
805 
806   ierr =  StashScatterBegin_Private(&baij->stash,baij->rowners); CHKERRQ(ierr);
807   ierr =  StashScatterBegin_Private(&baij->bstash,baij->rowners); CHKERRQ(ierr);
808 
809   /* Free cache space */
810   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
811   PetscFunctionReturn(0);
812 }
813 
814 #undef __FUNC__
815 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
816 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
817 {
818   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
819   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
820   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
821   int         *row,*col,other_disassembled,r1,r2,r3;
822   Scalar      *val;
823   InsertMode  addv = mat->insertmode;
824 
825   PetscFunctionBegin;
826   if (!baij->donotstash) {
827     while (1) {
828       ierr = StashScatterGetMesg_Private(&baij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for ( i=0; i<n; ) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
834         if (j < n) ncols = j-i;
835         else       ncols = n-i;
836         /* Now assemble all these values with a single function call */
837         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
838         i = j;
839       }
840     }
841     ierr = StashScatterEnd_Private(&baij->stash); CHKERRQ(ierr);
842     /* Now process the block-stash. Since the values are stashed column-oriented,
843        set the roworiented flag to column oriented, and after MatSetValues()
844        restore the original flags */
845     r1 = baij->roworiented;
846     r2 = a->roworiented;
847     r3 = b->roworiented;
848     baij->roworiented = 0;
849     a->roworiented    = 0;
850     b->roworiented    = 0;
851     while (1) {
852       ierr = StashScatterGetMesg_Private(&baij->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
853       if (!flg) break;
854 
855       for ( i=0; i<n; ) {
856         /* Now identify the consecutive vals belonging to the same row */
857         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
858         if (j < n) ncols = j-i;
859         else       ncols = n-i;
860         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr);
861         i = j;
862       }
863     }
864     ierr = StashScatterEnd_Private(&baij->bstash); CHKERRQ(ierr);
865     baij->roworiented = r1;
866     a->roworiented    = r2;
867     b->roworiented    = r3;
868   }
869 
870   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
871   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
872 
873   /* determine if any processor has disassembled, if so we must
874      also disassemble ourselfs, in order that we may reassemble. */
875   /*
876      if nonzero structure of submatrix B cannot change then we know that
877      no processor disassembled thus we can skip this stuff
878   */
879   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
880     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
881     if (mat->was_assembled && !other_disassembled) {
882       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
883     }
884   }
885 
886   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
887     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
888   }
889   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
890   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
891 
892 #if defined(USE_PETSC_BOPT_g)
893   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
894     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
895              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
896     baij->ht_total_ct  = 0;
897     baij->ht_insert_ct = 0;
898   }
899 #endif
900   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
901     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
902     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
903     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
904   }
905 
906   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
907   PetscFunctionReturn(0);
908 }
909 
910 #undef __FUNC__
911 #define __FUNC__ "MatView_MPIBAIJ_Binary"
912 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
913 {
914   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
915   int          ierr;
916 
917   PetscFunctionBegin;
918   if (baij->size == 1) {
919     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
920   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
921   PetscFunctionReturn(0);
922 }
923 
924 #undef __FUNC__
925 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
926 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
927 {
928   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
929   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
930   FILE         *fd;
931   ViewerType   vtype;
932 
933   PetscFunctionBegin;
934   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
935   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
936     ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr);
937     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
938       MatInfo info;
939       MPI_Comm_rank(mat->comm,&rank);
940       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
941       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
942       PetscSequentialPhaseBegin(mat->comm,1);
943       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
944               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
945               baij->bs,(int)info.memory);
946       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
947       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
948       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
949       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
950       fflush(fd);
951       PetscSequentialPhaseEnd(mat->comm,1);
952       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
953       PetscFunctionReturn(0);
954     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
955       PetscPrintf(mat->comm,"  block size is %d\n",bs);
956       PetscFunctionReturn(0);
957     }
958   }
959 
960   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
961     Draw       draw;
962     PetscTruth isnull;
963     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
964     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
965   }
966 
967   if (size == 1) {
968     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
969   } else {
970     /* assemble the entire matrix onto first processor. */
971     Mat         A;
972     Mat_SeqBAIJ *Aloc;
973     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
974     int         mbs = baij->mbs;
975     Scalar      *a;
976 
977     if (!rank) {
978       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
979     } else {
980       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
981     }
982     PLogObjectParent(mat,A);
983 
984     /* copy over the A part */
985     Aloc = (Mat_SeqBAIJ*) baij->A->data;
986     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
987     rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
988 
989     for ( i=0; i<mbs; i++ ) {
990       rvals[0] = bs*(baij->rstart + i);
991       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
992       for ( j=ai[i]; j<ai[i+1]; j++ ) {
993         col = (baij->cstart+aj[j])*bs;
994         for (k=0; k<bs; k++ ) {
995           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
996           col++; a += bs;
997         }
998       }
999     }
1000     /* copy over the B part */
1001     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1002     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1003     for ( i=0; i<mbs; i++ ) {
1004       rvals[0] = bs*(baij->rstart + i);
1005       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1006       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1007         col = baij->garray[aj[j]]*bs;
1008         for (k=0; k<bs; k++ ) {
1009           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1010           col++; a += bs;
1011         }
1012       }
1013     }
1014     PetscFree(rvals);
1015     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1016     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1017     /*
1018        Everyone has to call to draw the matrix since the graphics waits are
1019        synchronized across all processors that share the Draw object
1020     */
1021     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1022       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1023     }
1024     ierr = MatDestroy(A); CHKERRQ(ierr);
1025   }
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 
1030 
1031 #undef __FUNC__
1032 #define __FUNC__ "MatView_MPIBAIJ"
1033 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1034 {
1035   int         ierr;
1036   ViewerType  vtype;
1037 
1038   PetscFunctionBegin;
1039   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1040   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1041       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
1042     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
1043   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1044     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1045   } else {
1046     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNC__
1052 #define __FUNC__ "MatDestroy_MPIBAIJ"
1053 int MatDestroy_MPIBAIJ(Mat mat)
1054 {
1055   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1056   int         ierr;
1057 
1058   PetscFunctionBegin;
1059   if (--mat->refct > 0) PetscFunctionReturn(0);
1060 
1061   if (mat->mapping) {
1062     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
1063   }
1064   if (mat->bmapping) {
1065     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
1066   }
1067   if (mat->rmap) {
1068     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1069   }
1070   if (mat->cmap) {
1071     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1072   }
1073 #if defined(USE_PETSC_LOG)
1074   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1075 #endif
1076 
1077   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1078   ierr = StashDestroy_Private(&baij->bstash); CHKERRQ(ierr);
1079   PetscFree(baij->rowners);
1080   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1081   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1082 #if defined (USE_CTABLE)
1083   if (baij->colmap) TableDelete(baij->colmap);
1084 #else
1085   if (baij->colmap) PetscFree(baij->colmap);
1086 #endif
1087   if (baij->garray) PetscFree(baij->garray);
1088   if (baij->lvec)   VecDestroy(baij->lvec);
1089   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1090   if (baij->rowvalues) PetscFree(baij->rowvalues);
1091   if (baij->barray) PetscFree(baij->barray);
1092   if (baij->hd) PetscFree(baij->hd);
1093   PetscFree(baij);
1094   PLogObjectDestroy(mat);
1095   PetscHeaderDestroy(mat);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNC__
1100 #define __FUNC__ "MatMult_MPIBAIJ"
1101 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1102 {
1103   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1104   int         ierr, nt;
1105 
1106   PetscFunctionBegin;
1107   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1108   if (nt != a->n) {
1109     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1110   }
1111   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1112   if (nt != a->m) {
1113     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1114   }
1115   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1116   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1117   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1118   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1119   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNC__
1124 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1125 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1126 {
1127   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1128   int        ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1132   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1133   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1134   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNC__
1139 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1140 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1141 {
1142   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1143   int         ierr;
1144 
1145   PetscFunctionBegin;
1146   /* do nondiagonal part */
1147   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1148   /* send it on its way */
1149   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1150   /* do local part */
1151   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1152   /* receive remote parts: note this assumes the values are not actually */
1153   /* inserted in yy until the next line, which is true for my implementation*/
1154   /* but is not perhaps always true. */
1155   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNC__
1160 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1161 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1162 {
1163   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1164   int         ierr;
1165 
1166   PetscFunctionBegin;
1167   /* do nondiagonal part */
1168   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1169   /* send it on its way */
1170   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1171   /* do local part */
1172   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1173   /* receive remote parts: note this assumes the values are not actually */
1174   /* inserted in yy until the next line, which is true for my implementation*/
1175   /* but is not perhaps always true. */
1176   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 /*
1181   This only works correctly for square matrices where the subblock A->A is the
1182    diagonal block
1183 */
1184 #undef __FUNC__
1185 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1186 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1187 {
1188   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1189   int         ierr;
1190 
1191   PetscFunctionBegin;
1192   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1193   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNC__
1198 #define __FUNC__ "MatScale_MPIBAIJ"
1199 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1200 {
1201   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1202   int         ierr;
1203 
1204   PetscFunctionBegin;
1205   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1206   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNC__
1211 #define __FUNC__ "MatGetSize_MPIBAIJ"
1212 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1213 {
1214   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1215 
1216   PetscFunctionBegin;
1217   if (m) *m = mat->M;
1218   if (n) *n = mat->N;
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 #undef __FUNC__
1223 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1224 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1225 {
1226   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1227 
1228   PetscFunctionBegin;
1229   *m = mat->m; *n = mat->n;
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNC__
1234 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1235 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1236 {
1237   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1238 
1239   PetscFunctionBegin;
1240   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNC__
1245 #define __FUNC__ "MatGetRow_MPIBAIJ"
1246 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1247 {
1248   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1249   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1250   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1251   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1252   int        *cmap, *idx_p,cstart = mat->cstart;
1253 
1254   PetscFunctionBegin;
1255   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1256   mat->getrowactive = PETSC_TRUE;
1257 
1258   if (!mat->rowvalues && (idx || v)) {
1259     /*
1260         allocate enough space to hold information from the longest row.
1261     */
1262     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1263     int     max = 1,mbs = mat->mbs,tmp;
1264     for ( i=0; i<mbs; i++ ) {
1265       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1266       if (max < tmp) { max = tmp; }
1267     }
1268     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1269     CHKPTRQ(mat->rowvalues);
1270     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1271   }
1272 
1273   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1274   lrow = row - brstart;
1275 
1276   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1277   if (!v)   {pvA = 0; pvB = 0;}
1278   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1279   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1280   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1281   nztot = nzA + nzB;
1282 
1283   cmap  = mat->garray;
1284   if (v  || idx) {
1285     if (nztot) {
1286       /* Sort by increasing column numbers, assuming A and B already sorted */
1287       int imark = -1;
1288       if (v) {
1289         *v = v_p = mat->rowvalues;
1290         for ( i=0; i<nzB; i++ ) {
1291           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1292           else break;
1293         }
1294         imark = i;
1295         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1296         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1297       }
1298       if (idx) {
1299         *idx = idx_p = mat->rowindices;
1300         if (imark > -1) {
1301           for ( i=0; i<imark; i++ ) {
1302             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1303           }
1304         } else {
1305           for ( i=0; i<nzB; i++ ) {
1306             if (cmap[cworkB[i]/bs] < cstart)
1307               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1308             else break;
1309           }
1310           imark = i;
1311         }
1312         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1313         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1314       }
1315     } else {
1316       if (idx) *idx = 0;
1317       if (v)   *v   = 0;
1318     }
1319   }
1320   *nz = nztot;
1321   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1322   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNC__
1327 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1328 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1329 {
1330   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1331 
1332   PetscFunctionBegin;
1333   if (baij->getrowactive == PETSC_FALSE) {
1334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1335   }
1336   baij->getrowactive = PETSC_FALSE;
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNC__
1341 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1342 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1343 {
1344   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1345 
1346   PetscFunctionBegin;
1347   *bs = baij->bs;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNC__
1352 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1353 int MatZeroEntries_MPIBAIJ(Mat A)
1354 {
1355   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1356   int         ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1360   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNC__
1365 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1366 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1367 {
1368   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1369   Mat         A = a->A, B = a->B;
1370   int         ierr;
1371   double      isend[5], irecv[5];
1372 
1373   PetscFunctionBegin;
1374   info->block_size     = (double)a->bs;
1375   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1376   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1377   isend[3] = info->memory;  isend[4] = info->mallocs;
1378   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1379   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1380   isend[3] += info->memory;  isend[4] += info->mallocs;
1381   if (flag == MAT_LOCAL) {
1382     info->nz_used      = isend[0];
1383     info->nz_allocated = isend[1];
1384     info->nz_unneeded  = isend[2];
1385     info->memory       = isend[3];
1386     info->mallocs      = isend[4];
1387   } else if (flag == MAT_GLOBAL_MAX) {
1388     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1389     info->nz_used      = irecv[0];
1390     info->nz_allocated = irecv[1];
1391     info->nz_unneeded  = irecv[2];
1392     info->memory       = irecv[3];
1393     info->mallocs      = irecv[4];
1394   } else if (flag == MAT_GLOBAL_SUM) {
1395     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1396     info->nz_used      = irecv[0];
1397     info->nz_allocated = irecv[1];
1398     info->nz_unneeded  = irecv[2];
1399     info->memory       = irecv[3];
1400     info->mallocs      = irecv[4];
1401   } else {
1402     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1403   }
1404   info->rows_global       = (double)a->M;
1405   info->columns_global    = (double)a->N;
1406   info->rows_local        = (double)a->m;
1407   info->columns_local     = (double)a->N;
1408   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1409   info->fill_ratio_needed = 0;
1410   info->factor_mallocs    = 0;
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNC__
1415 #define __FUNC__ "MatSetOption_MPIBAIJ"
1416 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1417 {
1418   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1419   int         ierr;
1420 
1421   PetscFunctionBegin;
1422   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1423       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1424       op == MAT_COLUMNS_UNSORTED ||
1425       op == MAT_COLUMNS_SORTED ||
1426       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1427       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1428         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1429         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1430   } else if (op == MAT_ROW_ORIENTED) {
1431         a->roworiented = 1;
1432         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1433         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1434   } else if (op == MAT_ROWS_SORTED ||
1435              op == MAT_ROWS_UNSORTED ||
1436              op == MAT_SYMMETRIC ||
1437              op == MAT_STRUCTURALLY_SYMMETRIC ||
1438              op == MAT_YES_NEW_DIAGONALS ||
1439              op == MAT_USE_HASH_TABLE) {
1440     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1441   } else if (op == MAT_COLUMN_ORIENTED) {
1442     a->roworiented = 0;
1443     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1444     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1445   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1446     a->donotstash = 1;
1447   } else if (op == MAT_NO_NEW_DIAGONALS) {
1448     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1449   } else if (op == MAT_USE_HASH_TABLE) {
1450     a->ht_flag = 1;
1451   } else {
1452     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNC__
1458 #define __FUNC__ "MatTranspose_MPIBAIJ("
1459 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1460 {
1461   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1462   Mat_SeqBAIJ *Aloc;
1463   Mat        B;
1464   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1465   int        bs=baij->bs,mbs=baij->mbs;
1466   Scalar     *a;
1467 
1468   PetscFunctionBegin;
1469   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1470   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1471   CHKERRQ(ierr);
1472 
1473   /* copy over the A part */
1474   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1475   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1476   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1477 
1478   for ( i=0; i<mbs; i++ ) {
1479     rvals[0] = bs*(baij->rstart + i);
1480     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1481     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1482       col = (baij->cstart+aj[j])*bs;
1483       for (k=0; k<bs; k++ ) {
1484         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1485         col++; a += bs;
1486       }
1487     }
1488   }
1489   /* copy over the B part */
1490   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1491   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1492   for ( i=0; i<mbs; i++ ) {
1493     rvals[0] = bs*(baij->rstart + i);
1494     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1495     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1496       col = baij->garray[aj[j]]*bs;
1497       for (k=0; k<bs; k++ ) {
1498         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1499         col++; a += bs;
1500       }
1501     }
1502   }
1503   PetscFree(rvals);
1504   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1505   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1506 
1507   if (matout != PETSC_NULL) {
1508     *matout = B;
1509   } else {
1510     PetscOps *Abops;
1511     MatOps   Aops;
1512 
1513     /* This isn't really an in-place transpose .... but free data structures from baij */
1514     PetscFree(baij->rowners);
1515     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1516     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1517 #if defined (USE_CTABLE)
1518     if (baij->colmap) TableDelete(baij->colmap);
1519 #else
1520     if (baij->colmap) PetscFree(baij->colmap);
1521 #endif
1522     if (baij->garray) PetscFree(baij->garray);
1523     if (baij->lvec) VecDestroy(baij->lvec);
1524     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1525     PetscFree(baij);
1526 
1527     /*
1528        This is horrible, horrible code. We need to keep the
1529       A pointers for the bops and ops but copy everything
1530       else from C.
1531     */
1532     Abops = A->bops;
1533     Aops  = A->ops;
1534     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1535     A->bops = Abops;
1536     A->ops  = Aops;
1537 
1538     PetscHeaderDestroy(B);
1539   }
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNC__
1544 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1545 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1546 {
1547   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1548   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1549   int ierr,s1,s2,s3;
1550 
1551   PetscFunctionBegin;
1552   if (ll)  {
1553     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1554     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1555     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1556     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1557     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1558   }
1559   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 #undef __FUNC__
1564 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1565 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1566 {
1567   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1568   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1569   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1570   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1571   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1572   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1573   MPI_Comm       comm = A->comm;
1574   MPI_Request    *send_waits,*recv_waits;
1575   MPI_Status     recv_status,*send_status;
1576   IS             istmp;
1577 
1578   PetscFunctionBegin;
1579   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1580   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1581 
1582   /*  first count number of contributors to each processor */
1583   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1584   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1585   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1586   for ( i=0; i<N; i++ ) {
1587     idx = rows[i];
1588     found = 0;
1589     for ( j=0; j<size; j++ ) {
1590       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1591         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1592       }
1593     }
1594     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1595   }
1596   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1597 
1598   /* inform other processors of number of messages and max length*/
1599   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1600   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1601   nrecvs = work[rank];
1602   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1603   nmax   = work[rank];
1604   PetscFree(work);
1605 
1606   /* post receives:   */
1607   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1608   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1609   for ( i=0; i<nrecvs; i++ ) {
1610     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1611   }
1612 
1613   /* do sends:
1614      1) starts[i] gives the starting index in svalues for stuff going to
1615      the ith processor
1616   */
1617   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1618   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1619   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1620   starts[0] = 0;
1621   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1622   for ( i=0; i<N; i++ ) {
1623     svalues[starts[owner[i]]++] = rows[i];
1624   }
1625   ISRestoreIndices(is,&rows);
1626 
1627   starts[0] = 0;
1628   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1629   count = 0;
1630   for ( i=0; i<size; i++ ) {
1631     if (procs[i]) {
1632       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1633     }
1634   }
1635   PetscFree(starts);
1636 
1637   base = owners[rank]*bs;
1638 
1639   /*  wait on receives */
1640   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1641   source = lens + nrecvs;
1642   count  = nrecvs; slen = 0;
1643   while (count) {
1644     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1645     /* unpack receives into our local space */
1646     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1647     source[imdex]  = recv_status.MPI_SOURCE;
1648     lens[imdex]  = n;
1649     slen += n;
1650     count--;
1651   }
1652   PetscFree(recv_waits);
1653 
1654   /* move the data into the send scatter */
1655   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1656   count = 0;
1657   for ( i=0; i<nrecvs; i++ ) {
1658     values = rvalues + i*nmax;
1659     for ( j=0; j<lens[i]; j++ ) {
1660       lrows[count++] = values[j] - base;
1661     }
1662   }
1663   PetscFree(rvalues); PetscFree(lens);
1664   PetscFree(owner); PetscFree(nprocs);
1665 
1666   /* actually zap the local rows */
1667   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1668   PLogObjectParent(A,istmp);
1669 
1670   /*
1671         Zero the required rows. If the "diagonal block" of the matrix
1672      is square and the user wishes to set the diagonal we use seperate
1673      code so that MatSetValues() is not called for each diagonal allocating
1674      new memory, thus calling lots of mallocs and slowing things down.
1675 
1676        Contributed by: Mathew Knepley
1677   */
1678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1679   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1680   if (diag && (l->A->M == l->A->N)) {
1681     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1682   } else if (diag) {
1683     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1684     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1685       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1686 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1687     }
1688     for ( i = 0; i < slen; i++ ) {
1689       row  = lrows[i] + rstart_bs;
1690       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1691     }
1692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   } else {
1695     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1696   }
1697 
1698   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1699   PetscFree(lrows);
1700 
1701   /* wait on sends */
1702   if (nsends) {
1703     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1704     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1705     PetscFree(send_status);
1706   }
1707   PetscFree(send_waits); PetscFree(svalues);
1708 
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 #undef __FUNC__
1713 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1714 int MatPrintHelp_MPIBAIJ(Mat A)
1715 {
1716   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1717   MPI_Comm    comm = A->comm;
1718   static int  called = 0;
1719   int         ierr;
1720 
1721   PetscFunctionBegin;
1722   if (!a->rank) {
1723     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1724   }
1725   if (called) {PetscFunctionReturn(0);} else called = 1;
1726   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1727   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNC__
1732 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1733 int MatSetUnfactored_MPIBAIJ(Mat A)
1734 {
1735   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1736   int         ierr;
1737 
1738   PetscFunctionBegin;
1739   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1744 
1745 /* -------------------------------------------------------------------*/
1746 static struct _MatOps MatOps_Values = {
1747   MatSetValues_MPIBAIJ,
1748   MatGetRow_MPIBAIJ,
1749   MatRestoreRow_MPIBAIJ,
1750   MatMult_MPIBAIJ,
1751   MatMultAdd_MPIBAIJ,
1752   MatMultTrans_MPIBAIJ,
1753   MatMultTransAdd_MPIBAIJ,
1754   0,
1755   0,
1756   0,
1757   0,
1758   0,
1759   0,
1760   0,
1761   MatTranspose_MPIBAIJ,
1762   MatGetInfo_MPIBAIJ,
1763   0,
1764   MatGetDiagonal_MPIBAIJ,
1765   MatDiagonalScale_MPIBAIJ,
1766   MatNorm_MPIBAIJ,
1767   MatAssemblyBegin_MPIBAIJ,
1768   MatAssemblyEnd_MPIBAIJ,
1769   0,
1770   MatSetOption_MPIBAIJ,
1771   MatZeroEntries_MPIBAIJ,
1772   MatZeroRows_MPIBAIJ,
1773   0,
1774   0,
1775   0,
1776   0,
1777   MatGetSize_MPIBAIJ,
1778   MatGetLocalSize_MPIBAIJ,
1779   MatGetOwnershipRange_MPIBAIJ,
1780   0,
1781   0,
1782   0,
1783   0,
1784   MatDuplicate_MPIBAIJ,
1785   0,
1786   0,
1787   0,
1788   0,
1789   0,
1790   MatGetSubMatrices_MPIBAIJ,
1791   MatIncreaseOverlap_MPIBAIJ,
1792   MatGetValues_MPIBAIJ,
1793   0,
1794   MatPrintHelp_MPIBAIJ,
1795   MatScale_MPIBAIJ,
1796   0,
1797   0,
1798   0,
1799   MatGetBlockSize_MPIBAIJ,
1800   0,
1801   0,
1802   0,
1803   0,
1804   0,
1805   0,
1806   MatSetUnfactored_MPIBAIJ,
1807   0,
1808   MatSetValuesBlocked_MPIBAIJ,
1809   0,
1810   0,
1811   0,
1812   MatGetMaps_Petsc};
1813 
1814 
1815 EXTERN_C_BEGIN
1816 #undef __FUNC__
1817 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1818 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1819 {
1820   PetscFunctionBegin;
1821   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1822   *iscopy = PETSC_FALSE;
1823   PetscFunctionReturn(0);
1824 }
1825 EXTERN_C_END
1826 
1827 #undef __FUNC__
1828 #define __FUNC__ "MatCreateMPIBAIJ"
1829 /*@C
1830    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1831    (block compressed row).  For good matrix assembly performance
1832    the user should preallocate the matrix storage by setting the parameters
1833    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1834    performance can be increased by more than a factor of 50.
1835 
1836    Collective on MPI_Comm
1837 
1838    Input Parameters:
1839 +  comm - MPI communicator
1840 .  bs   - size of blockk
1841 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1842            This value should be the same as the local size used in creating the
1843            y vector for the matrix-vector product y = Ax.
1844 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1845            This value should be the same as the local size used in creating the
1846            x vector for the matrix-vector product y = Ax.
1847 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1848 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1849 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1850            submatrix  (same for all local rows)
1851 .  d_nzz - array containing the number of block nonzeros in the various block rows
1852            of the in diagonal portion of the local (possibly different for each block
1853            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1854 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1855            submatrix (same for all local rows).
1856 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1857            off-diagonal portion of the local submatrix (possibly different for
1858            each block row) or PETSC_NULL.
1859 
1860    Output Parameter:
1861 .  A - the matrix
1862 
1863    Options Database Keys:
1864 .   -mat_no_unroll - uses code that does not unroll the loops in the
1865                      block calculations (much slower)
1866 .   -mat_block_size - size of the blocks to use
1867 .   -mat_mpi - use the parallel matrix data structures even on one processor
1868                (defaults to using SeqBAIJ format on one processor)
1869 
1870    Notes:
1871    The user MUST specify either the local or global matrix dimensions
1872    (possibly both).
1873 
1874    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1875    than it must be used on all processors that share the object for that argument.
1876 
1877    Storage Information:
1878    For a square global matrix we define each processor's diagonal portion
1879    to be its local rows and the corresponding columns (a square submatrix);
1880    each processor's off-diagonal portion encompasses the remainder of the
1881    local matrix (a rectangular submatrix).
1882 
1883    The user can specify preallocated storage for the diagonal part of
1884    the local submatrix with either d_nz or d_nnz (not both).  Set
1885    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1886    memory allocation.  Likewise, specify preallocated storage for the
1887    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1888 
1889    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1890    the figure below we depict these three local rows and all columns (0-11).
1891 
1892 .vb
1893            0 1 2 3 4 5 6 7 8 9 10 11
1894           -------------------
1895    row 3  |  o o o d d d o o o o o o
1896    row 4  |  o o o d d d o o o o o o
1897    row 5  |  o o o d d d o o o o o o
1898           -------------------
1899 .ve
1900 
1901    Thus, any entries in the d locations are stored in the d (diagonal)
1902    submatrix, and any entries in the o locations are stored in the
1903    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1904    stored simply in the MATSEQBAIJ format for compressed row storage.
1905 
1906    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1907    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1908    In general, for PDE problems in which most nonzeros are near the diagonal,
1909    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1910    or you will get TERRIBLE performance; see the users' manual chapter on
1911    matrices.
1912 
1913    Level: intermediate
1914 
1915 .keywords: matrix, block, aij, compressed row, sparse, parallel
1916 
1917 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1918 @*/
1919 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1920                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1921 {
1922   Mat          B;
1923   Mat_MPIBAIJ  *b;
1924   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1925   int          flag1 = 0,flag2 = 0;
1926 
1927   PetscFunctionBegin;
1928   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1929 
1930   MPI_Comm_size(comm,&size);
1931   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
1932   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1933   if (!flag1 && !flag2 && size == 1) {
1934     if (M == PETSC_DECIDE) M = m;
1935     if (N == PETSC_DECIDE) N = n;
1936     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1937     PetscFunctionReturn(0);
1938   }
1939 
1940   *A = 0;
1941   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
1942   PLogObjectCreate(B);
1943   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1944   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1945   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1946 
1947   B->ops->destroy    = MatDestroy_MPIBAIJ;
1948   B->ops->view       = MatView_MPIBAIJ;
1949   B->mapping    = 0;
1950   B->factor     = 0;
1951   B->assembled  = PETSC_FALSE;
1952 
1953   B->insertmode = NOT_SET_VALUES;
1954   MPI_Comm_rank(comm,&b->rank);
1955   MPI_Comm_size(comm,&b->size);
1956 
1957   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1958     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1959   }
1960   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1961     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1962   }
1963   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1964     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1965   }
1966   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1967   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1968 
1969   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1970     work[0] = m; work[1] = n;
1971     mbs = m/bs; nbs = n/bs;
1972     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1973     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1974     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1975   }
1976   if (m == PETSC_DECIDE) {
1977     Mbs = M/bs;
1978     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1979     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1980     m   = mbs*bs;
1981   }
1982   if (n == PETSC_DECIDE) {
1983     Nbs = N/bs;
1984     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1985     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1986     n   = nbs*bs;
1987   }
1988   if (mbs*bs != m || nbs*bs != n) {
1989     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1990   }
1991 
1992   b->m = m; B->m = m;
1993   b->n = n; B->n = n;
1994   b->N = N; B->N = N;
1995   b->M = M; B->M = M;
1996   b->bs  = bs;
1997   b->bs2 = bs*bs;
1998   b->mbs = mbs;
1999   b->nbs = nbs;
2000   b->Mbs = Mbs;
2001   b->Nbs = Nbs;
2002 
2003   /* the information in the maps duplicates the information computed below, eventually
2004      we should remove the duplicate information that is not contained in the maps */
2005   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2006   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2007 
2008   /* build local table of row and column ownerships */
2009   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
2010   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2011   b->cowners = b->rowners + b->size + 2;
2012   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2013   b->rowners[0] = 0;
2014   for ( i=2; i<=b->size; i++ ) {
2015     b->rowners[i] += b->rowners[i-1];
2016   }
2017   b->rstart    = b->rowners[b->rank];
2018   b->rend      = b->rowners[b->rank+1];
2019   b->rstart_bs = b->rstart * bs;
2020   b->rend_bs   = b->rend * bs;
2021 
2022   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2023   b->cowners[0] = 0;
2024   for ( i=2; i<=b->size; i++ ) {
2025     b->cowners[i] += b->cowners[i-1];
2026   }
2027   b->cstart    = b->cowners[b->rank];
2028   b->cend      = b->cowners[b->rank+1];
2029   b->cstart_bs = b->cstart * bs;
2030   b->cend_bs   = b->cend * bs;
2031 
2032 
2033   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2034   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2035   PLogObjectParent(B,b->A);
2036   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2037   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2038   PLogObjectParent(B,b->B);
2039 
2040   /* build cache for off array entries formed */
2041   ierr = StashCreate_Private(comm,1,bs,&b->stash); CHKERRQ(ierr);
2042   ierr = StashCreate_Private(comm,bs,bs,&b->bstash); CHKERRQ(ierr);
2043   b->donotstash  = 0;
2044   b->colmap      = 0;
2045   b->garray      = 0;
2046   b->roworiented = 1;
2047 
2048   /* stuff used in block assembly */
2049   b->barray       = 0;
2050 
2051   /* stuff used for matrix vector multiply */
2052   b->lvec         = 0;
2053   b->Mvctx        = 0;
2054 
2055   /* stuff for MatGetRow() */
2056   b->rowindices   = 0;
2057   b->rowvalues    = 0;
2058   b->getrowactive = PETSC_FALSE;
2059 
2060   /* hash table stuff */
2061   b->ht           = 0;
2062   b->hd           = 0;
2063   b->ht_size      = 0;
2064   b->ht_flag      = 0;
2065   b->ht_fact      = 0;
2066   b->ht_total_ct  = 0;
2067   b->ht_insert_ct = 0;
2068 
2069   *A = B;
2070   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2071   if (flg) {
2072     double fact = 1.39;
2073     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2074     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2075     if (fact <= 1.0) fact = 1.39;
2076     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2077     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2078   }
2079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2080                                      "MatGetDiagonalBlock_MPIBAIJ",
2081                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNC__
2086 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2087 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2088 {
2089   Mat         mat;
2090   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2091   int         ierr, len=0, flg;
2092 
2093   PetscFunctionBegin;
2094   *newmat       = 0;
2095   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2096   PLogObjectCreate(mat);
2097   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2098   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2099   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2100   mat->ops->view       = MatView_MPIBAIJ;
2101   mat->factor          = matin->factor;
2102   mat->assembled       = PETSC_TRUE;
2103   mat->insertmode      = NOT_SET_VALUES;
2104 
2105   a->m = mat->m   = oldmat->m;
2106   a->n = mat->n   = oldmat->n;
2107   a->M = mat->M   = oldmat->M;
2108   a->N = mat->N   = oldmat->N;
2109 
2110   a->bs  = oldmat->bs;
2111   a->bs2 = oldmat->bs2;
2112   a->mbs = oldmat->mbs;
2113   a->nbs = oldmat->nbs;
2114   a->Mbs = oldmat->Mbs;
2115   a->Nbs = oldmat->Nbs;
2116 
2117   a->rstart       = oldmat->rstart;
2118   a->rend         = oldmat->rend;
2119   a->cstart       = oldmat->cstart;
2120   a->cend         = oldmat->cend;
2121   a->size         = oldmat->size;
2122   a->rank         = oldmat->rank;
2123   a->donotstash   = oldmat->donotstash;
2124   a->roworiented  = oldmat->roworiented;
2125   a->rowindices   = 0;
2126   a->rowvalues    = 0;
2127   a->getrowactive = PETSC_FALSE;
2128   a->barray       = 0;
2129   a->rstart_bs    = oldmat->rstart_bs;
2130   a->rend_bs      = oldmat->rend_bs;
2131   a->cstart_bs    = oldmat->cstart_bs;
2132   a->cend_bs      = oldmat->cend_bs;
2133 
2134   /* hash table stuff */
2135   a->ht           = 0;
2136   a->hd           = 0;
2137   a->ht_size      = 0;
2138   a->ht_flag      = oldmat->ht_flag;
2139   a->ht_fact      = oldmat->ht_fact;
2140   a->ht_total_ct  = 0;
2141   a->ht_insert_ct = 0;
2142 
2143 
2144   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2145   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2146   a->cowners = a->rowners + a->size + 2;
2147   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2148   ierr = StashCreate_Private(matin->comm,1,oldmat->bs,&a->stash); CHKERRQ(ierr);
2149   ierr = StashCreate_Private(matin->comm,oldmat->bs,oldmat->bs,&a->bstash); CHKERRQ(ierr);
2150   if (oldmat->colmap) {
2151 #if defined (USE_CTABLE)
2152   ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
2153 #else
2154     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2155     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2156     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2157 #endif
2158   } else a->colmap = 0;
2159   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2160     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2161     PLogObjectMemory(mat,len*sizeof(int));
2162     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2163   } else a->garray = 0;
2164 
2165   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2166   PLogObjectParent(mat,a->lvec);
2167   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2168 
2169   PLogObjectParent(mat,a->Mvctx);
2170   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
2171   PLogObjectParent(mat,a->A);
2172   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
2173   PLogObjectParent(mat,a->B);
2174   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2175   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2176   if (flg) {
2177     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2178   }
2179   *newmat = mat;
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 #include "sys.h"
2184 
2185 #undef __FUNC__
2186 #define __FUNC__ "MatLoad_MPIBAIJ"
2187 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2188 {
2189   Mat          A;
2190   int          i, nz, ierr, j,rstart, rend, fd;
2191   Scalar       *vals,*buf;
2192   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2193   MPI_Status   status;
2194   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2195   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2196   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2197   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2198   int          dcount,kmax,k,nzcount,tmp;
2199 
2200   PetscFunctionBegin;
2201   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2202 
2203   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2204   if (!rank) {
2205     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2206     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2207     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2208     if (header[3] < 0) {
2209       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2210     }
2211   }
2212 
2213   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2214   M = header[1]; N = header[2];
2215 
2216   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2217 
2218   /*
2219      This code adds extra rows to make sure the number of rows is
2220      divisible by the blocksize
2221   */
2222   Mbs        = M/bs;
2223   extra_rows = bs - M + bs*(Mbs);
2224   if (extra_rows == bs) extra_rows = 0;
2225   else                  Mbs++;
2226   if (extra_rows &&!rank) {
2227     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2228   }
2229 
2230   /* determine ownership of all rows */
2231   mbs = Mbs/size + ((Mbs % size) > rank);
2232   m   = mbs * bs;
2233   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2234   browners = rowners + size + 1;
2235   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2236   rowners[0] = 0;
2237   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2238   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2239   rstart = rowners[rank];
2240   rend   = rowners[rank+1];
2241 
2242   /* distribute row lengths to all processors */
2243   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2244   if (!rank) {
2245     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2246     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2247     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2248     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2249     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2250     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2251     PetscFree(sndcounts);
2252   } else {
2253     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2254   }
2255 
2256   if (!rank) {
2257     /* calculate the number of nonzeros on each processor */
2258     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2259     PetscMemzero(procsnz,size*sizeof(int));
2260     for ( i=0; i<size; i++ ) {
2261       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2262         procsnz[i] += rowlengths[j];
2263       }
2264     }
2265     PetscFree(rowlengths);
2266 
2267     /* determine max buffer needed and allocate it */
2268     maxnz = 0;
2269     for ( i=0; i<size; i++ ) {
2270       maxnz = PetscMax(maxnz,procsnz[i]);
2271     }
2272     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2273 
2274     /* read in my part of the matrix column indices  */
2275     nz = procsnz[0];
2276     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2277     mycols = ibuf;
2278     if (size == 1)  nz -= extra_rows;
2279     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2280     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2281 
2282     /* read in every ones (except the last) and ship off */
2283     for ( i=1; i<size-1; i++ ) {
2284       nz   = procsnz[i];
2285       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2286       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2287     }
2288     /* read in the stuff for the last proc */
2289     if ( size != 1 ) {
2290       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2291       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2292       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2293       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2294     }
2295     PetscFree(cols);
2296   } else {
2297     /* determine buffer space needed for message */
2298     nz = 0;
2299     for ( i=0; i<m; i++ ) {
2300       nz += locrowlens[i];
2301     }
2302     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2303     mycols = ibuf;
2304     /* receive message of column indices*/
2305     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2306     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2307     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2308   }
2309 
2310   /* loop over local rows, determining number of off diagonal entries */
2311   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2312   odlens = dlens + (rend-rstart);
2313   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2314   PetscMemzero(mask,3*Mbs*sizeof(int));
2315   masked1 = mask    + Mbs;
2316   masked2 = masked1 + Mbs;
2317   rowcount = 0; nzcount = 0;
2318   for ( i=0; i<mbs; i++ ) {
2319     dcount  = 0;
2320     odcount = 0;
2321     for ( j=0; j<bs; j++ ) {
2322       kmax = locrowlens[rowcount];
2323       for ( k=0; k<kmax; k++ ) {
2324         tmp = mycols[nzcount++]/bs;
2325         if (!mask[tmp]) {
2326           mask[tmp] = 1;
2327           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2328           else masked1[dcount++] = tmp;
2329         }
2330       }
2331       rowcount++;
2332     }
2333 
2334     dlens[i]  = dcount;
2335     odlens[i] = odcount;
2336 
2337     /* zero out the mask elements we set */
2338     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2339     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2340   }
2341 
2342   /* create our matrix */
2343   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2344          CHKERRQ(ierr);
2345   A = *newmat;
2346   MatSetOption(A,MAT_COLUMNS_SORTED);
2347 
2348   if (!rank) {
2349     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2350     /* read in my part of the matrix numerical values  */
2351     nz = procsnz[0];
2352     vals = buf;
2353     mycols = ibuf;
2354     if (size == 1)  nz -= extra_rows;
2355     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2356     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2357 
2358     /* insert into matrix */
2359     jj      = rstart*bs;
2360     for ( i=0; i<m; i++ ) {
2361       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2362       mycols += locrowlens[i];
2363       vals   += locrowlens[i];
2364       jj++;
2365     }
2366     /* read in other processors (except the last one) and ship out */
2367     for ( i=1; i<size-1; i++ ) {
2368       nz   = procsnz[i];
2369       vals = buf;
2370       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2371       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2372     }
2373     /* the last proc */
2374     if ( size != 1 ){
2375       nz   = procsnz[i] - extra_rows;
2376       vals = buf;
2377       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2378       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2379       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2380     }
2381     PetscFree(procsnz);
2382   } else {
2383     /* receive numeric values */
2384     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2385 
2386     /* receive message of values*/
2387     vals   = buf;
2388     mycols = ibuf;
2389     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2390     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2391     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2392 
2393     /* insert into matrix */
2394     jj      = rstart*bs;
2395     for ( i=0; i<m; i++ ) {
2396       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2397       mycols += locrowlens[i];
2398       vals   += locrowlens[i];
2399       jj++;
2400     }
2401   }
2402   PetscFree(locrowlens);
2403   PetscFree(buf);
2404   PetscFree(ibuf);
2405   PetscFree(rowners);
2406   PetscFree(dlens);
2407   PetscFree(mask);
2408   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2409   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 
2414 
2415 #undef __FUNC__
2416 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2417 /*@
2418    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2419 
2420    Input Parameters:
2421 .  mat  - the matrix
2422 .  fact - factor
2423 
2424    Collective on Mat
2425 
2426   Notes:
2427    This can also be set by the command line option: -mat_use_hash_table fact
2428 
2429 .keywords: matrix, hashtable, factor, HT
2430 
2431 .seealso: MatSetOption()
2432 @*/
2433 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2434 {
2435   Mat_MPIBAIJ *baij;
2436 
2437   PetscFunctionBegin;
2438   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2439   if (mat->type != MATMPIBAIJ) {
2440       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2441   }
2442   baij = (Mat_MPIBAIJ*) mat->data;
2443   baij->ht_fact = fact;
2444   PetscFunctionReturn(0);
2445 }
2446