xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 7ef1d9bd187294aa042bd52ec02f5579326131eb)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.158 1999/03/11 22:30:18 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 /*
20      Local utility routine that creates a mapping from the global column
21    number to the local number in the off-diagonal part of the local
22    storage of the matrix.  This is done in a non scable way since the
23    length of colmap equals the global matrix length.
24 */
25 #undef __FUNC__
26 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
27 static int CreateColmap_MPIBAIJ_Private(Mat mat)
28 {
29   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
30   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
31   int         nbs = B->nbs,i,bs=B->bs;
32 #if defined (USE_CTABLE)
33   int         ierr;
34 #endif
35 
36   PetscFunctionBegin;
37 #if defined (USE_CTABLE)
38   ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr);
39   for ( i=0; i<nbs; i++ ){
40     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
41   }
42 #else
43   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
44   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
45   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
46   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
47 #endif
48   PetscFunctionReturn(0);
49 }
50 
51 #define CHUNKSIZE  10
52 
53 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
54 { \
55  \
56     brow = row/bs;  \
57     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
58     rmax = aimax[brow]; nrow = ailen[brow]; \
59       bcol = col/bs; \
60       ridx = row % bs; cidx = col % bs; \
61       low = 0; high = nrow; \
62       while (high-low > 3) { \
63         t = (low+high)/2; \
64         if (rp[t] > bcol) high = t; \
65         else              low  = t; \
66       } \
67       for ( _i=low; _i<high; _i++ ) { \
68         if (rp[_i] > bcol) break; \
69         if (rp[_i] == bcol) { \
70           bap  = ap +  bs2*_i + bs*cidx + ridx; \
71           if (addv == ADD_VALUES) *bap += value;  \
72           else                    *bap  = value;  \
73           goto a_noinsert; \
74         } \
75       } \
76       if (a->nonew == 1) goto a_noinsert; \
77       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
78       if (nrow >= rmax) { \
79         /* there is no extra room in row, therefore enlarge */ \
80         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
81         Scalar *new_a; \
82  \
83         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
84  \
85         /* malloc new storage space */ \
86         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
87         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
88         new_j   = (int *) (new_a + bs2*new_nz); \
89         new_i   = new_j + new_nz; \
90  \
91         /* copy over old data into new slots */ \
92         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
93         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
94         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
95         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
96         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
97                                                            len*sizeof(int)); \
98         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
99         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
100         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
101                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
102         /* free up old matrix storage */ \
103         PetscFree(a->a);  \
104         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
105         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
106         a->singlemalloc = 1; \
107  \
108         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
110         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
111         a->maxnz += bs2*CHUNKSIZE; \
112         a->reallocs++; \
113         a->nz++; \
114       } \
115       N = nrow++ - 1;  \
116       /* shift up all the later entries in this row */ \
117       for ( ii=N; ii>=_i; ii-- ) { \
118         rp[ii+1] = rp[ii]; \
119         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
120       } \
121       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
122       rp[_i]                      = bcol;  \
123       ap[bs2*_i + bs*cidx + ridx] = value;  \
124       a_noinsert:; \
125     ailen[brow] = nrow; \
126 }
127 
128 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
129 { \
130  \
131     brow = row/bs;  \
132     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
133     rmax = bimax[brow]; nrow = bilen[brow]; \
134       bcol = col/bs; \
135       ridx = row % bs; cidx = col % bs; \
136       low = 0; high = nrow; \
137       while (high-low > 3) { \
138         t = (low+high)/2; \
139         if (rp[t] > bcol) high = t; \
140         else              low  = t; \
141       } \
142       for ( _i=low; _i<high; _i++ ) { \
143         if (rp[_i] > bcol) break; \
144         if (rp[_i] == bcol) { \
145           bap  = ap +  bs2*_i + bs*cidx + ridx; \
146           if (addv == ADD_VALUES) *bap += value;  \
147           else                    *bap  = value;  \
148           goto b_noinsert; \
149         } \
150       } \
151       if (b->nonew == 1) goto b_noinsert; \
152       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
153       if (nrow >= rmax) { \
154         /* there is no extra room in row, therefore enlarge */ \
155         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
156         Scalar *new_a; \
157  \
158         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
159  \
160         /* malloc new storage space */ \
161         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
162         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
163         new_j   = (int *) (new_a + bs2*new_nz); \
164         new_i   = new_j + new_nz; \
165  \
166         /* copy over old data into new slots */ \
167         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
168         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
169         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
170         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
171         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
172                                                            len*sizeof(int)); \
173         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
174         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
175         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
176                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
177         /* free up old matrix storage */ \
178         PetscFree(b->a);  \
179         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
180         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
181         b->singlemalloc = 1; \
182  \
183         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
184         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
185         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
186         b->maxnz += bs2*CHUNKSIZE; \
187         b->reallocs++; \
188         b->nz++; \
189       } \
190       N = nrow++ - 1;  \
191       /* shift up all the later entries in this row */ \
192       for ( ii=N; ii>=_i; ii-- ) { \
193         rp[ii+1] = rp[ii]; \
194         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
195       } \
196       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
197       rp[_i]                      = bcol;  \
198       ap[bs2*_i + bs*cidx + ridx] = value;  \
199       b_noinsert:; \
200     bilen[brow] = nrow; \
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ "MatSetValues_MPIBAIJ"
205 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
206 {
207   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
208   Scalar      value;
209   int         ierr,i,j,row,col;
210   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
211   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
212   int         cend_orig=baij->cend_bs,bs=baij->bs;
213 
214   /* Some Variables required in the macro */
215   Mat         A = baij->A;
216   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
217   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
218   Scalar      *aa=a->a;
219 
220   Mat         B = baij->B;
221   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
222   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
223   Scalar      *ba=b->a;
224 
225   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
226   int         low,high,t,ridx,cidx,bs2=a->bs2;
227   Scalar      *ap,*bap;
228 
229   PetscFunctionBegin;
230   for ( i=0; i<m; i++ ) {
231     if (im[i] < 0) continue;
232 #if defined(USE_PETSC_BOPT_g)
233     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
234 #endif
235     if (im[i] >= rstart_orig && im[i] < rend_orig) {
236       row = im[i] - rstart_orig;
237       for ( j=0; j<n; j++ ) {
238         if (in[j] >= cstart_orig && in[j] < cend_orig){
239           col = in[j] - cstart_orig;
240           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
241           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
242           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         }
244         else if (in[j] < 0) continue;
245 #if defined(USE_PETSC_BOPT_g)
246         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
247 #endif
248         else {
249           if (mat->was_assembled) {
250             if (!baij->colmap) {
251               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
252             }
253 #if defined (USE_CTABLE)
254             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr);
255             col  = col - 1 + in[j]%bs;
256 #else
257             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
258 #endif
259             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
260               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
261               col =  in[j];
262               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
263               B = baij->B;
264               b = (Mat_SeqBAIJ *) (B)->data;
265               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
266               ba=b->a;
267             }
268           } else col = in[j];
269           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
270           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
271           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
272         }
273       }
274     } else {
275       if (roworiented && !baij->donotstash) {
276         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
277       } else {
278         if (!baij->donotstash) {
279           row = im[i];
280 	  for ( j=0; j<n; j++ ) {
281 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr);
282           }
283         }
284       }
285     }
286   }
287   PetscFunctionReturn(0);
288 }
289 
290 #undef __FUNC__
291 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
292 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
293 {
294   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
295   Scalar      *value,*barray=baij->barray;
296   int         ierr,i,j,ii,jj,row,col;
297   int         roworiented = baij->roworiented,rstart=baij->rstart ;
298   int         rend=baij->rend,cstart=baij->cstart,stepval;
299   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
300 
301   if(!barray) {
302     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
303   }
304 
305   if (roworiented) {
306     stepval = (n-1)*bs;
307   } else {
308     stepval = (m-1)*bs;
309   }
310   for ( i=0; i<m; i++ ) {
311     if (im[i] < 0) continue;
312 #if defined(USE_PETSC_BOPT_g)
313     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
314 #endif
315     if (im[i] >= rstart && im[i] < rend) {
316       row = im[i] - rstart;
317       for ( j=0; j<n; j++ ) {
318         /* If NumCol = 1 then a copy is not required */
319         if ((roworiented) && (n == 1)) {
320           barray = v + i*bs2;
321         } else if((!roworiented) && (m == 1)) {
322           barray = v + j*bs2;
323         } else { /* Here a copy is required */
324           if (roworiented) {
325             value = v + i*(stepval+bs)*bs + j*bs;
326           } else {
327             value = v + j*(stepval+bs)*bs + i*bs;
328           }
329           for ( ii=0; ii<bs; ii++,value+=stepval ) {
330             for (jj=0; jj<bs; jj++ ) {
331               *barray++  = *value++;
332             }
333           }
334           barray -=bs2;
335         }
336 
337         if (in[j] >= cstart && in[j] < cend){
338           col  = in[j] - cstart;
339           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
340         }
341         else if (in[j] < 0) continue;
342 #if defined(USE_PETSC_BOPT_g)
343         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
344 #endif
345         else {
346           if (mat->was_assembled) {
347             if (!baij->colmap) {
348               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
349             }
350 
351 #if defined(USE_PETSC_BOPT_g)
352 #if defined (USE_CTABLE)
353             { int data;
354             ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr);
355             if((data - 1) % bs)
356 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
357             }
358 #else
359             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
360 #endif
361 #endif
362 #if defined (USE_CTABLE)
363 	    ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr);
364             col  = (col - 1)/bs;
365 #else
366             col = (baij->colmap[in[j]] - 1)/bs;
367 #endif
368             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
369               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
370               col =  in[j];
371             }
372           }
373           else col = in[j];
374           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
375         }
376       }
377     } else {
378       if (!baij->donotstash) {
379         /*        ierr = StashValuesBlocked_Private(&baij->bstash,im[i],n,in,v+i*bs2*n,
380                                           roworiented,stepval,addv);CHKERRQ(ierr); */
381         ierr = StashValuesBlocked_Private(&baij->bstash,im[i],n,in,v,m,n,
382                                           roworiented,i);CHKERRQ(ierr);
383       }
384     }
385   }
386   PetscFunctionReturn(0);
387 }
388 #define HASH_KEY 0.6180339887
389 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
390 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
391 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
392 #undef __FUNC__
393 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
394 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
395 {
396   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
397   int         ierr,i,j,row,col;
398   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
399   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
400   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
401   double      tmp;
402   Scalar      ** HD = baij->hd,value;
403 #if defined(USE_PETSC_BOPT_g)
404   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
405 #endif
406 
407   PetscFunctionBegin;
408 
409   for ( i=0; i<m; i++ ) {
410 #if defined(USE_PETSC_BOPT_g)
411     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
412     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
413 #endif
414       row = im[i];
415     if (row >= rstart_orig && row < rend_orig) {
416       for ( j=0; j<n; j++ ) {
417         col = in[j];
418         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
419         /* Look up into the Hash Table */
420         key = (row/bs)*Nbs+(col/bs)+1;
421         h1  = HASH(size,key,tmp);
422 
423 
424         idx = h1;
425 #if defined(USE_PETSC_BOPT_g)
426         insert_ct++;
427         total_ct++;
428         if (HT[idx] != key) {
429           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
430           if (idx == size) {
431             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
432             if (idx == h1) {
433               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
434             }
435           }
436         }
437 #else
438         if (HT[idx] != key) {
439           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
440           if (idx == size) {
441             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
442             if (idx == h1) {
443               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
444             }
445           }
446         }
447 #endif
448         /* A HASH table entry is found, so insert the values at the correct address */
449         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
450         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
451       }
452     } else {
453       if (roworiented && !baij->donotstash) {
454         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
455       } else {
456         if (!baij->donotstash) {
457           row = im[i];
458 	  for ( j=0; j<n; j++ ) {
459 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m);CHKERRQ(ierr);
460           }
461         }
462       }
463     }
464   }
465 #if defined(USE_PETSC_BOPT_g)
466   baij->ht_total_ct = total_ct;
467   baij->ht_insert_ct = insert_ct;
468 #endif
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNC__
473 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
474 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
475 {
476   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
477   int         ierr,i,j,ii,jj,row,col,k,l;
478   int         roworiented = baij->roworiented,rstart=baij->rstart ;
479   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
480   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
481   double      tmp;
482   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
483 #if defined(USE_PETSC_BOPT_g)
484   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
485 #endif
486 
487   PetscFunctionBegin;
488 
489   if (roworiented) {
490     stepval = (n-1)*bs;
491   } else {
492     stepval = (m-1)*bs;
493   }
494   for ( i=0; i<m; i++ ) {
495 #if defined(USE_PETSC_BOPT_g)
496     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
497     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
498 #endif
499     row   = im[i];
500     v_t   = v + i*bs2;
501     if (row >= rstart && row < rend) {
502       for ( j=0; j<n; j++ ) {
503         col = in[j];
504 
505         /* Look up into the Hash Table */
506         key = row*Nbs+col+1;
507         h1  = HASH(size,key,tmp);
508 
509         idx = h1;
510 #if defined(USE_PETSC_BOPT_g)
511         total_ct++;
512         insert_ct++;
513        if (HT[idx] != key) {
514           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
515           if (idx == size) {
516             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
517             if (idx == h1) {
518               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
519             }
520           }
521         }
522 #else
523         if (HT[idx] != key) {
524           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
525           if (idx == size) {
526             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
527             if (idx == h1) {
528               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
529             }
530           }
531         }
532 #endif
533         baij_a = HD[idx];
534         if (roworiented) {
535           /*value = v + i*(stepval+bs)*bs + j*bs;*/
536           /* value = v + (i*(stepval+bs)+j)*bs; */
537           value = v_t;
538           v_t  += bs;
539           if (addv == ADD_VALUES) {
540             for ( ii=0; ii<bs; ii++,value+=stepval) {
541               for ( jj=ii; jj<bs2; jj+=bs ) {
542                 baij_a[jj]  += *value++;
543               }
544             }
545           } else {
546             for ( ii=0; ii<bs; ii++,value+=stepval) {
547               for ( jj=ii; jj<bs2; jj+=bs ) {
548                 baij_a[jj]  = *value++;
549               }
550             }
551           }
552         } else {
553           value = v + j*(stepval+bs)*bs + i*bs;
554           if (addv == ADD_VALUES) {
555             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
556               for ( jj=0; jj<bs; jj++ ) {
557                 baij_a[jj]  += *value++;
558               }
559             }
560           } else {
561             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
562               for ( jj=0; jj<bs; jj++ ) {
563                 baij_a[jj]  = *value++;
564               }
565             }
566           }
567         }
568       }
569     } else {
570       if (!baij->donotstash) {
571         if (roworiented ) {
572           row   = im[i]*bs;
573           value = v + i*(stepval+bs)*bs;
574           for ( j=0; j<bs; j++,row++ ) {
575             for ( k=0; k<n; k++ ) {
576               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
577                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr);
578               }
579             }
580           }
581         } else {
582           for ( j=0; j<n; j++ ) {
583             value = v + j*(stepval+bs)*bs + i*bs;
584             col   = in[j]*bs;
585             for ( k=0; k<bs; k++,col++,value+=stepval) {
586               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
587                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr);
588               }
589             }
590           }
591         }
592       }
593     }
594   }
595 #if defined(USE_PETSC_BOPT_g)
596   baij->ht_total_ct = total_ct;
597   baij->ht_insert_ct = insert_ct;
598 #endif
599   PetscFunctionReturn(0);
600 }
601 
602 #undef __FUNC__
603 #define __FUNC__ "MatGetValues_MPIBAIJ"
604 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
605 {
606   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
607   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
608   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
609 
610   PetscFunctionBegin;
611   for ( i=0; i<m; i++ ) {
612     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
613     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
614     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
615       row = idxm[i] - bsrstart;
616       for ( j=0; j<n; j++ ) {
617         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
618         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
619         if (idxn[j] >= bscstart && idxn[j] < bscend){
620           col = idxn[j] - bscstart;
621           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
622         } else {
623           if (!baij->colmap) {
624             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
625           }
626 #if defined (USE_CTABLE)
627           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr);
628           data --;
629 #else
630           data = baij->colmap[idxn[j]/bs]-1;
631 #endif
632           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
633           else {
634             col  = data + idxn[j]%bs;
635             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
636           }
637         }
638       }
639     } else {
640       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
641     }
642   }
643  PetscFunctionReturn(0);
644 }
645 
646 #undef __FUNC__
647 #define __FUNC__ "MatNorm_MPIBAIJ"
648 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
649 {
650   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
651   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
652   int        ierr, i,bs2=baij->bs2;
653   double     sum = 0.0;
654   Scalar     *v;
655 
656   PetscFunctionBegin;
657   if (baij->size == 1) {
658     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
659   } else {
660     if (type == NORM_FROBENIUS) {
661       v = amat->a;
662       for (i=0; i<amat->nz*bs2; i++ ) {
663 #if defined(USE_PETSC_COMPLEX)
664         sum += PetscReal(PetscConj(*v)*(*v)); v++;
665 #else
666         sum += (*v)*(*v); v++;
667 #endif
668       }
669       v = bmat->a;
670       for (i=0; i<bmat->nz*bs2; i++ ) {
671 #if defined(USE_PETSC_COMPLEX)
672         sum += PetscReal(PetscConj(*v)*(*v)); v++;
673 #else
674         sum += (*v)*(*v); v++;
675 #endif
676       }
677       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
678       *norm = sqrt(*norm);
679     } else {
680       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
681     }
682   }
683   PetscFunctionReturn(0);
684 }
685 
686 
687 /*
688   Creates the hash table, and sets the table
689   This table is created only once.
690   If new entried need to be added to the matrix
691   then the hash table has to be destroyed and
692   recreated.
693 */
694 #undef __FUNC__
695 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
696 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
697 {
698   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
699   Mat         A = baij->A, B=baij->B;
700   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
701   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
702   int         size,bs2=baij->bs2,rstart=baij->rstart;
703   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
704   int         *HT,key;
705   Scalar      **HD;
706   double      tmp;
707 #if defined(USE_PETSC_BOPT_g)
708   int         ct=0,max=0;
709 #endif
710 
711   PetscFunctionBegin;
712   baij->ht_size=(int)(factor*nz);
713   size = baij->ht_size;
714 
715   if (baij->ht) {
716     PetscFunctionReturn(0);
717   }
718 
719   /* Allocate Memory for Hash Table */
720   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
721   baij->ht = (int*)(baij->hd + size);
722   HD = baij->hd;
723   HT = baij->ht;
724 
725 
726   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
727 
728 
729   /* Loop Over A */
730   for ( i=0; i<a->mbs; i++ ) {
731     for ( j=ai[i]; j<ai[i+1]; j++ ) {
732       row = i+rstart;
733       col = aj[j]+cstart;
734 
735       key = row*Nbs + col + 1;
736       h1  = HASH(size,key,tmp);
737       for ( k=0; k<size; k++ ){
738         if (HT[(h1+k)%size] == 0.0) {
739           HT[(h1+k)%size] = key;
740           HD[(h1+k)%size] = a->a + j*bs2;
741           break;
742 #if defined(USE_PETSC_BOPT_g)
743         } else {
744           ct++;
745 #endif
746         }
747       }
748 #if defined(USE_PETSC_BOPT_g)
749       if (k> max) max = k;
750 #endif
751     }
752   }
753   /* Loop Over B */
754   for ( i=0; i<b->mbs; i++ ) {
755     for ( j=bi[i]; j<bi[i+1]; j++ ) {
756       row = i+rstart;
757       col = garray[bj[j]];
758       key = row*Nbs + col + 1;
759       h1  = HASH(size,key,tmp);
760       for ( k=0; k<size; k++ ){
761         if (HT[(h1+k)%size] == 0.0) {
762           HT[(h1+k)%size] = key;
763           HD[(h1+k)%size] = b->a + j*bs2;
764           break;
765 #if defined(USE_PETSC_BOPT_g)
766         } else {
767           ct++;
768 #endif
769         }
770       }
771 #if defined(USE_PETSC_BOPT_g)
772       if (k> max) max = k;
773 #endif
774     }
775   }
776 
777   /* Print Summary */
778 #if defined(USE_PETSC_BOPT_g)
779   for ( i=0,j=0; i<size; i++)
780     if (HT[i]) {j++;}
781   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
782            (j== 0)? 0.0:((double)(ct+j))/j,max);
783 #endif
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNC__
788 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
789 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
790 {
791   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
792   int         ierr;
793   InsertMode  addv;
794 
795   PetscFunctionBegin;
796   if (baij->donotstash) {
797     PetscFunctionReturn(0);
798   }
799 
800   /* make sure all processors are either in INSERTMODE or ADDMODE */
801   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
802   if (addv == (ADD_VALUES|INSERT_VALUES)) {
803     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
804   }
805   mat->insertmode = addv; /* in case this processor had no cache */
806 
807   ierr =  StashScatterBegin_Private(&baij->stash,baij->rowners); CHKERRQ(ierr);
808   ierr =  StashScatterBegin_Private(&baij->bstash,baij->rowners); CHKERRQ(ierr);
809 
810   /* Free cache space */
811   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
812   PetscFunctionReturn(0);
813 }
814 
815 #undef __FUNC__
816 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
817 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
818 {
819   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
820   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
821   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
822   int         *row,*col,other_disassembled,r1,r2,r3;
823   Scalar      *val;
824   InsertMode  addv = mat->insertmode;
825 
826   PetscFunctionBegin;
827   if (!baij->donotstash) {
828     while (1) {
829       ierr = StashScatterGetMesg_Private(&baij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
830       if (!flg) break;
831 
832       for ( i=0; i<n; ) {
833         /* Now identify the consecutive vals belonging to the same row */
834         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
835         if (j < n) ncols = j-i;
836         else       ncols = n-i;
837         /* Now assemble all these values with a single function call */
838         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
839         i = j;
840       }
841     }
842     ierr = StashScatterEnd_Private(&baij->stash); CHKERRQ(ierr);
843     /* Now process the block-stash. Since the values are stashed column-oriented,
844        set the roworiented flag to column oriented, and after MatSetValues()
845        restore the original flags */
846     r1 = baij->roworiented;
847     r2 = a->roworiented;
848     r3 = b->roworiented;
849     baij->roworiented = 0;
850     a->roworiented    = 0;
851     b->roworiented    = 0;
852     while (1) {
853       ierr = StashScatterGetMesg_Private(&baij->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
854       if (!flg) break;
855 
856       for ( i=0; i<n; ) {
857         /* Now identify the consecutive vals belonging to the same row */
858         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
859         if (j < n) ncols = j-i;
860         else       ncols = n-i;
861         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr);
862         i = j;
863       }
864     }
865     ierr = StashScatterEnd_Private(&baij->bstash); CHKERRQ(ierr);
866     baij->roworiented = r1;
867     a->roworiented    = r2;
868     b->roworiented    = r3;
869   }
870 
871   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
872   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
873 
874   /* determine if any processor has disassembled, if so we must
875      also disassemble ourselfs, in order that we may reassemble. */
876   /*
877      if nonzero structure of submatrix B cannot change then we know that
878      no processor disassembled thus we can skip this stuff
879   */
880   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
881     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
882     if (mat->was_assembled && !other_disassembled) {
883       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
884     }
885   }
886 
887   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
888     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
889   }
890   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
891   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
892 
893 #if defined(USE_PETSC_BOPT_g)
894   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
895     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
896              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
897     baij->ht_total_ct  = 0;
898     baij->ht_insert_ct = 0;
899   }
900 #endif
901   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
902     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
903     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
904     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
905   }
906 
907   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
908   PetscFunctionReturn(0);
909 }
910 
911 #undef __FUNC__
912 #define __FUNC__ "MatView_MPIBAIJ_Binary"
913 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
914 {
915   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
916   int          ierr;
917 
918   PetscFunctionBegin;
919   if (baij->size == 1) {
920     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
921   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNC__
926 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
927 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
928 {
929   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
930   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
931   FILE         *fd;
932   ViewerType   vtype;
933 
934   PetscFunctionBegin;
935   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
936   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
937     ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr);
938     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
939       MatInfo info;
940       MPI_Comm_rank(mat->comm,&rank);
941       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
942       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
943       PetscSequentialPhaseBegin(mat->comm,1);
944       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
945               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
946               baij->bs,(int)info.memory);
947       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
948       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
949       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
950       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
951       fflush(fd);
952       PetscSequentialPhaseEnd(mat->comm,1);
953       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
954       PetscFunctionReturn(0);
955     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
956       PetscPrintf(mat->comm,"  block size is %d\n",bs);
957       PetscFunctionReturn(0);
958     }
959   }
960 
961   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
962     Draw       draw;
963     PetscTruth isnull;
964     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
965     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
966   }
967 
968   if (size == 1) {
969     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
970   } else {
971     /* assemble the entire matrix onto first processor. */
972     Mat         A;
973     Mat_SeqBAIJ *Aloc;
974     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
975     int         mbs = baij->mbs;
976     Scalar      *a;
977 
978     if (!rank) {
979       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
980     } else {
981       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
982     }
983     PLogObjectParent(mat,A);
984 
985     /* copy over the A part */
986     Aloc = (Mat_SeqBAIJ*) baij->A->data;
987     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
988     rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
989 
990     for ( i=0; i<mbs; i++ ) {
991       rvals[0] = bs*(baij->rstart + i);
992       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
993       for ( j=ai[i]; j<ai[i+1]; j++ ) {
994         col = (baij->cstart+aj[j])*bs;
995         for (k=0; k<bs; k++ ) {
996           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
997           col++; a += bs;
998         }
999       }
1000     }
1001     /* copy over the B part */
1002     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1003     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1004     for ( i=0; i<mbs; i++ ) {
1005       rvals[0] = bs*(baij->rstart + i);
1006       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1007       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1008         col = baij->garray[aj[j]]*bs;
1009         for (k=0; k<bs; k++ ) {
1010           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1011           col++; a += bs;
1012         }
1013       }
1014     }
1015     PetscFree(rvals);
1016     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1017     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1018     /*
1019        Everyone has to call to draw the matrix since the graphics waits are
1020        synchronized across all processors that share the Draw object
1021     */
1022     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1023       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1024     }
1025     ierr = MatDestroy(A); CHKERRQ(ierr);
1026   }
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 
1031 
1032 #undef __FUNC__
1033 #define __FUNC__ "MatView_MPIBAIJ"
1034 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1035 {
1036   int         ierr;
1037   ViewerType  vtype;
1038 
1039   PetscFunctionBegin;
1040   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1041   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1042       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
1043     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
1044   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1045     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1046   } else {
1047     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1048   }
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNC__
1053 #define __FUNC__ "MatDestroy_MPIBAIJ"
1054 int MatDestroy_MPIBAIJ(Mat mat)
1055 {
1056   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1057   int         ierr;
1058 
1059   PetscFunctionBegin;
1060   if (--mat->refct > 0) PetscFunctionReturn(0);
1061 
1062   if (mat->mapping) {
1063     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
1064   }
1065   if (mat->bmapping) {
1066     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
1067   }
1068   if (mat->rmap) {
1069     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1070   }
1071   if (mat->cmap) {
1072     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1073   }
1074 #if defined(USE_PETSC_LOG)
1075   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1076 #endif
1077 
1078   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1079   ierr = StashDestroy_Private(&baij->bstash); CHKERRQ(ierr);
1080   PetscFree(baij->rowners);
1081   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1082   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1083 #if defined (USE_CTABLE)
1084   if (baij->colmap) TableDelete(baij->colmap);
1085 #else
1086   if (baij->colmap) PetscFree(baij->colmap);
1087 #endif
1088   if (baij->garray) PetscFree(baij->garray);
1089   if (baij->lvec)   VecDestroy(baij->lvec);
1090   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1091   if (baij->rowvalues) PetscFree(baij->rowvalues);
1092   if (baij->barray) PetscFree(baij->barray);
1093   if (baij->hd) PetscFree(baij->hd);
1094   PetscFree(baij);
1095   PLogObjectDestroy(mat);
1096   PetscHeaderDestroy(mat);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNC__
1101 #define __FUNC__ "MatMult_MPIBAIJ"
1102 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1105   int         ierr, nt;
1106 
1107   PetscFunctionBegin;
1108   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1109   if (nt != a->n) {
1110     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1111   }
1112   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1113   if (nt != a->m) {
1114     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1115   }
1116   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1117   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1118   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1119   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1120   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNC__
1125 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1126 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1127 {
1128   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1129   int        ierr;
1130 
1131   PetscFunctionBegin;
1132   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1133   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1134   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1135   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNC__
1140 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1141 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1142 {
1143   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1144   int         ierr;
1145 
1146   PetscFunctionBegin;
1147   /* do nondiagonal part */
1148   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1149   /* send it on its way */
1150   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1151   /* do local part */
1152   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1153   /* receive remote parts: note this assumes the values are not actually */
1154   /* inserted in yy until the next line, which is true for my implementation*/
1155   /* but is not perhaps always true. */
1156   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNC__
1161 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1162 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1163 {
1164   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1165   int         ierr;
1166 
1167   PetscFunctionBegin;
1168   /* do nondiagonal part */
1169   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1170   /* send it on its way */
1171   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1172   /* do local part */
1173   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1174   /* receive remote parts: note this assumes the values are not actually */
1175   /* inserted in yy until the next line, which is true for my implementation*/
1176   /* but is not perhaps always true. */
1177   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 /*
1182   This only works correctly for square matrices where the subblock A->A is the
1183    diagonal block
1184 */
1185 #undef __FUNC__
1186 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1187 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1188 {
1189   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1190   int         ierr;
1191 
1192   PetscFunctionBegin;
1193   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1194   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNC__
1199 #define __FUNC__ "MatScale_MPIBAIJ"
1200 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1201 {
1202   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1203   int         ierr;
1204 
1205   PetscFunctionBegin;
1206   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1207   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNC__
1212 #define __FUNC__ "MatGetSize_MPIBAIJ"
1213 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1214 {
1215   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1216 
1217   PetscFunctionBegin;
1218   if (m) *m = mat->M;
1219   if (n) *n = mat->N;
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 #undef __FUNC__
1224 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1225 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1226 {
1227   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1228 
1229   PetscFunctionBegin;
1230   *m = mat->m; *n = mat->n;
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNC__
1235 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1236 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1237 {
1238   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1239 
1240   PetscFunctionBegin;
1241   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNC__
1246 #define __FUNC__ "MatGetRow_MPIBAIJ"
1247 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1248 {
1249   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1250   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1251   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1252   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1253   int        *cmap, *idx_p,cstart = mat->cstart;
1254 
1255   PetscFunctionBegin;
1256   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1257   mat->getrowactive = PETSC_TRUE;
1258 
1259   if (!mat->rowvalues && (idx || v)) {
1260     /*
1261         allocate enough space to hold information from the longest row.
1262     */
1263     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1264     int     max = 1,mbs = mat->mbs,tmp;
1265     for ( i=0; i<mbs; i++ ) {
1266       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1267       if (max < tmp) { max = tmp; }
1268     }
1269     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1270     CHKPTRQ(mat->rowvalues);
1271     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1272   }
1273 
1274   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1275   lrow = row - brstart;
1276 
1277   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1278   if (!v)   {pvA = 0; pvB = 0;}
1279   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1280   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1281   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1282   nztot = nzA + nzB;
1283 
1284   cmap  = mat->garray;
1285   if (v  || idx) {
1286     if (nztot) {
1287       /* Sort by increasing column numbers, assuming A and B already sorted */
1288       int imark = -1;
1289       if (v) {
1290         *v = v_p = mat->rowvalues;
1291         for ( i=0; i<nzB; i++ ) {
1292           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1293           else break;
1294         }
1295         imark = i;
1296         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1297         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1298       }
1299       if (idx) {
1300         *idx = idx_p = mat->rowindices;
1301         if (imark > -1) {
1302           for ( i=0; i<imark; i++ ) {
1303             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1304           }
1305         } else {
1306           for ( i=0; i<nzB; i++ ) {
1307             if (cmap[cworkB[i]/bs] < cstart)
1308               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1309             else break;
1310           }
1311           imark = i;
1312         }
1313         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1314         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1315       }
1316     } else {
1317       if (idx) *idx = 0;
1318       if (v)   *v   = 0;
1319     }
1320   }
1321   *nz = nztot;
1322   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1323   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNC__
1328 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1329 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1330 {
1331   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1332 
1333   PetscFunctionBegin;
1334   if (baij->getrowactive == PETSC_FALSE) {
1335     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1336   }
1337   baij->getrowactive = PETSC_FALSE;
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNC__
1342 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1343 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1344 {
1345   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1346 
1347   PetscFunctionBegin;
1348   *bs = baij->bs;
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNC__
1353 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1354 int MatZeroEntries_MPIBAIJ(Mat A)
1355 {
1356   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1357   int         ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1361   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNC__
1366 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1367 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1368 {
1369   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1370   Mat         A = a->A, B = a->B;
1371   int         ierr;
1372   double      isend[5], irecv[5];
1373 
1374   PetscFunctionBegin;
1375   info->block_size     = (double)a->bs;
1376   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1377   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1378   isend[3] = info->memory;  isend[4] = info->mallocs;
1379   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1380   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1381   isend[3] += info->memory;  isend[4] += info->mallocs;
1382   if (flag == MAT_LOCAL) {
1383     info->nz_used      = isend[0];
1384     info->nz_allocated = isend[1];
1385     info->nz_unneeded  = isend[2];
1386     info->memory       = isend[3];
1387     info->mallocs      = isend[4];
1388   } else if (flag == MAT_GLOBAL_MAX) {
1389     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1390     info->nz_used      = irecv[0];
1391     info->nz_allocated = irecv[1];
1392     info->nz_unneeded  = irecv[2];
1393     info->memory       = irecv[3];
1394     info->mallocs      = irecv[4];
1395   } else if (flag == MAT_GLOBAL_SUM) {
1396     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1397     info->nz_used      = irecv[0];
1398     info->nz_allocated = irecv[1];
1399     info->nz_unneeded  = irecv[2];
1400     info->memory       = irecv[3];
1401     info->mallocs      = irecv[4];
1402   } else {
1403     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1404   }
1405   info->rows_global       = (double)a->M;
1406   info->columns_global    = (double)a->N;
1407   info->rows_local        = (double)a->m;
1408   info->columns_local     = (double)a->N;
1409   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1410   info->fill_ratio_needed = 0;
1411   info->factor_mallocs    = 0;
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNC__
1416 #define __FUNC__ "MatSetOption_MPIBAIJ"
1417 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1418 {
1419   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1420   int         ierr;
1421 
1422   PetscFunctionBegin;
1423   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1424       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1425       op == MAT_COLUMNS_UNSORTED ||
1426       op == MAT_COLUMNS_SORTED ||
1427       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1428       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1429         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1430         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1431   } else if (op == MAT_ROW_ORIENTED) {
1432         a->roworiented = 1;
1433         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1434         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1435   } else if (op == MAT_ROWS_SORTED ||
1436              op == MAT_ROWS_UNSORTED ||
1437              op == MAT_SYMMETRIC ||
1438              op == MAT_STRUCTURALLY_SYMMETRIC ||
1439              op == MAT_YES_NEW_DIAGONALS ||
1440              op == MAT_USE_HASH_TABLE) {
1441     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1442   } else if (op == MAT_COLUMN_ORIENTED) {
1443     a->roworiented = 0;
1444     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1445     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1446   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1447     a->donotstash = 1;
1448   } else if (op == MAT_NO_NEW_DIAGONALS) {
1449     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1450   } else if (op == MAT_USE_HASH_TABLE) {
1451     a->ht_flag = 1;
1452   } else {
1453     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNC__
1459 #define __FUNC__ "MatTranspose_MPIBAIJ("
1460 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1461 {
1462   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1463   Mat_SeqBAIJ *Aloc;
1464   Mat        B;
1465   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1466   int        bs=baij->bs,mbs=baij->mbs;
1467   Scalar     *a;
1468 
1469   PetscFunctionBegin;
1470   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1471   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1472   CHKERRQ(ierr);
1473 
1474   /* copy over the A part */
1475   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1476   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1477   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1478 
1479   for ( i=0; i<mbs; i++ ) {
1480     rvals[0] = bs*(baij->rstart + i);
1481     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1482     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1483       col = (baij->cstart+aj[j])*bs;
1484       for (k=0; k<bs; k++ ) {
1485         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1486         col++; a += bs;
1487       }
1488     }
1489   }
1490   /* copy over the B part */
1491   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1492   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1493   for ( i=0; i<mbs; i++ ) {
1494     rvals[0] = bs*(baij->rstart + i);
1495     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1496     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1497       col = baij->garray[aj[j]]*bs;
1498       for (k=0; k<bs; k++ ) {
1499         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1500         col++; a += bs;
1501       }
1502     }
1503   }
1504   PetscFree(rvals);
1505   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1506   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1507 
1508   if (matout != PETSC_NULL) {
1509     *matout = B;
1510   } else {
1511     PetscOps *Abops;
1512     MatOps   Aops;
1513 
1514     /* This isn't really an in-place transpose .... but free data structures from baij */
1515     PetscFree(baij->rowners);
1516     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1517     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1518 #if defined (USE_CTABLE)
1519     if (baij->colmap) TableDelete(baij->colmap);
1520 #else
1521     if (baij->colmap) PetscFree(baij->colmap);
1522 #endif
1523     if (baij->garray) PetscFree(baij->garray);
1524     if (baij->lvec) VecDestroy(baij->lvec);
1525     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1526     PetscFree(baij);
1527 
1528     /*
1529        This is horrible, horrible code. We need to keep the
1530       A pointers for the bops and ops but copy everything
1531       else from C.
1532     */
1533     Abops = A->bops;
1534     Aops  = A->ops;
1535     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1536     A->bops = Abops;
1537     A->ops  = Aops;
1538 
1539     PetscHeaderDestroy(B);
1540   }
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNC__
1545 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1546 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1547 {
1548   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1549   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1550   int ierr,s1,s2,s3;
1551 
1552   PetscFunctionBegin;
1553   if (ll)  {
1554     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1555     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1556     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1557     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1558     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1559   }
1560   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #undef __FUNC__
1565 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1566 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1567 {
1568   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1569   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1570   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1571   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1572   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1573   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1574   MPI_Comm       comm = A->comm;
1575   MPI_Request    *send_waits,*recv_waits;
1576   MPI_Status     recv_status,*send_status;
1577   IS             istmp;
1578 
1579   PetscFunctionBegin;
1580   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1581   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1582 
1583   /*  first count number of contributors to each processor */
1584   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1585   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1586   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1587   for ( i=0; i<N; i++ ) {
1588     idx = rows[i];
1589     found = 0;
1590     for ( j=0; j<size; j++ ) {
1591       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1592         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1593       }
1594     }
1595     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1596   }
1597   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1598 
1599   /* inform other processors of number of messages and max length*/
1600   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1601   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1602   nrecvs = work[rank];
1603   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1604   nmax   = work[rank];
1605   PetscFree(work);
1606 
1607   /* post receives:   */
1608   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1609   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1610   for ( i=0; i<nrecvs; i++ ) {
1611     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1612   }
1613 
1614   /* do sends:
1615      1) starts[i] gives the starting index in svalues for stuff going to
1616      the ith processor
1617   */
1618   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1619   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1620   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1621   starts[0] = 0;
1622   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1623   for ( i=0; i<N; i++ ) {
1624     svalues[starts[owner[i]]++] = rows[i];
1625   }
1626   ISRestoreIndices(is,&rows);
1627 
1628   starts[0] = 0;
1629   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1630   count = 0;
1631   for ( i=0; i<size; i++ ) {
1632     if (procs[i]) {
1633       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1634     }
1635   }
1636   PetscFree(starts);
1637 
1638   base = owners[rank]*bs;
1639 
1640   /*  wait on receives */
1641   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1642   source = lens + nrecvs;
1643   count  = nrecvs; slen = 0;
1644   while (count) {
1645     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1646     /* unpack receives into our local space */
1647     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1648     source[imdex]  = recv_status.MPI_SOURCE;
1649     lens[imdex]  = n;
1650     slen += n;
1651     count--;
1652   }
1653   PetscFree(recv_waits);
1654 
1655   /* move the data into the send scatter */
1656   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1657   count = 0;
1658   for ( i=0; i<nrecvs; i++ ) {
1659     values = rvalues + i*nmax;
1660     for ( j=0; j<lens[i]; j++ ) {
1661       lrows[count++] = values[j] - base;
1662     }
1663   }
1664   PetscFree(rvalues); PetscFree(lens);
1665   PetscFree(owner); PetscFree(nprocs);
1666 
1667   /* actually zap the local rows */
1668   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1669   PLogObjectParent(A,istmp);
1670 
1671   /*
1672         Zero the required rows. If the "diagonal block" of the matrix
1673      is square and the user wishes to set the diagonal we use seperate
1674      code so that MatSetValues() is not called for each diagonal allocating
1675      new memory, thus calling lots of mallocs and slowing things down.
1676 
1677        Contributed by: Mathew Knepley
1678   */
1679   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1680   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1681   if (diag && (l->A->M == l->A->N)) {
1682     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1683   } else if (diag) {
1684     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1685     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1686       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1687 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1688     }
1689     for ( i = 0; i < slen; i++ ) {
1690       row  = lrows[i] + rstart_bs;
1691       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1692     }
1693     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695   } else {
1696     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1697   }
1698 
1699   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1700   PetscFree(lrows);
1701 
1702   /* wait on sends */
1703   if (nsends) {
1704     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1705     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1706     PetscFree(send_status);
1707   }
1708   PetscFree(send_waits); PetscFree(svalues);
1709 
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNC__
1714 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1715 int MatPrintHelp_MPIBAIJ(Mat A)
1716 {
1717   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1718   MPI_Comm    comm = A->comm;
1719   static int  called = 0;
1720   int         ierr;
1721 
1722   PetscFunctionBegin;
1723   if (!a->rank) {
1724     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1725   }
1726   if (called) {PetscFunctionReturn(0);} else called = 1;
1727   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1728   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNC__
1733 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1734 int MatSetUnfactored_MPIBAIJ(Mat A)
1735 {
1736   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1737   int         ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1745 
1746 /* -------------------------------------------------------------------*/
1747 static struct _MatOps MatOps_Values = {
1748   MatSetValues_MPIBAIJ,
1749   MatGetRow_MPIBAIJ,
1750   MatRestoreRow_MPIBAIJ,
1751   MatMult_MPIBAIJ,
1752   MatMultAdd_MPIBAIJ,
1753   MatMultTrans_MPIBAIJ,
1754   MatMultTransAdd_MPIBAIJ,
1755   0,
1756   0,
1757   0,
1758   0,
1759   0,
1760   0,
1761   0,
1762   MatTranspose_MPIBAIJ,
1763   MatGetInfo_MPIBAIJ,
1764   0,
1765   MatGetDiagonal_MPIBAIJ,
1766   MatDiagonalScale_MPIBAIJ,
1767   MatNorm_MPIBAIJ,
1768   MatAssemblyBegin_MPIBAIJ,
1769   MatAssemblyEnd_MPIBAIJ,
1770   0,
1771   MatSetOption_MPIBAIJ,
1772   MatZeroEntries_MPIBAIJ,
1773   MatZeroRows_MPIBAIJ,
1774   0,
1775   0,
1776   0,
1777   0,
1778   MatGetSize_MPIBAIJ,
1779   MatGetLocalSize_MPIBAIJ,
1780   MatGetOwnershipRange_MPIBAIJ,
1781   0,
1782   0,
1783   0,
1784   0,
1785   MatDuplicate_MPIBAIJ,
1786   0,
1787   0,
1788   0,
1789   0,
1790   0,
1791   MatGetSubMatrices_MPIBAIJ,
1792   MatIncreaseOverlap_MPIBAIJ,
1793   MatGetValues_MPIBAIJ,
1794   0,
1795   MatPrintHelp_MPIBAIJ,
1796   MatScale_MPIBAIJ,
1797   0,
1798   0,
1799   0,
1800   MatGetBlockSize_MPIBAIJ,
1801   0,
1802   0,
1803   0,
1804   0,
1805   0,
1806   0,
1807   MatSetUnfactored_MPIBAIJ,
1808   0,
1809   MatSetValuesBlocked_MPIBAIJ,
1810   0,
1811   0,
1812   0,
1813   MatGetMaps_Petsc};
1814 
1815 
1816 EXTERN_C_BEGIN
1817 #undef __FUNC__
1818 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1819 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1820 {
1821   PetscFunctionBegin;
1822   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1823   *iscopy = PETSC_FALSE;
1824   PetscFunctionReturn(0);
1825 }
1826 EXTERN_C_END
1827 
1828 #undef __FUNC__
1829 #define __FUNC__ "MatCreateMPIBAIJ"
1830 /*@C
1831    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1832    (block compressed row).  For good matrix assembly performance
1833    the user should preallocate the matrix storage by setting the parameters
1834    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1835    performance can be increased by more than a factor of 50.
1836 
1837    Collective on MPI_Comm
1838 
1839    Input Parameters:
1840 +  comm - MPI communicator
1841 .  bs   - size of blockk
1842 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1843            This value should be the same as the local size used in creating the
1844            y vector for the matrix-vector product y = Ax.
1845 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1846            This value should be the same as the local size used in creating the
1847            x vector for the matrix-vector product y = Ax.
1848 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1849 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1850 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1851            submatrix  (same for all local rows)
1852 .  d_nzz - array containing the number of block nonzeros in the various block rows
1853            of the in diagonal portion of the local (possibly different for each block
1854            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1855 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1856            submatrix (same for all local rows).
1857 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1858            off-diagonal portion of the local submatrix (possibly different for
1859            each block row) or PETSC_NULL.
1860 
1861    Output Parameter:
1862 .  A - the matrix
1863 
1864    Options Database Keys:
1865 .   -mat_no_unroll - uses code that does not unroll the loops in the
1866                      block calculations (much slower)
1867 .   -mat_block_size - size of the blocks to use
1868 .   -mat_mpi - use the parallel matrix data structures even on one processor
1869                (defaults to using SeqBAIJ format on one processor)
1870 
1871    Notes:
1872    The user MUST specify either the local or global matrix dimensions
1873    (possibly both).
1874 
1875    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1876    than it must be used on all processors that share the object for that argument.
1877 
1878    Storage Information:
1879    For a square global matrix we define each processor's diagonal portion
1880    to be its local rows and the corresponding columns (a square submatrix);
1881    each processor's off-diagonal portion encompasses the remainder of the
1882    local matrix (a rectangular submatrix).
1883 
1884    The user can specify preallocated storage for the diagonal part of
1885    the local submatrix with either d_nz or d_nnz (not both).  Set
1886    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1887    memory allocation.  Likewise, specify preallocated storage for the
1888    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1889 
1890    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1891    the figure below we depict these three local rows and all columns (0-11).
1892 
1893 .vb
1894            0 1 2 3 4 5 6 7 8 9 10 11
1895           -------------------
1896    row 3  |  o o o d d d o o o o o o
1897    row 4  |  o o o d d d o o o o o o
1898    row 5  |  o o o d d d o o o o o o
1899           -------------------
1900 .ve
1901 
1902    Thus, any entries in the d locations are stored in the d (diagonal)
1903    submatrix, and any entries in the o locations are stored in the
1904    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1905    stored simply in the MATSEQBAIJ format for compressed row storage.
1906 
1907    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1908    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1909    In general, for PDE problems in which most nonzeros are near the diagonal,
1910    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1911    or you will get TERRIBLE performance; see the users' manual chapter on
1912    matrices.
1913 
1914    Level: intermediate
1915 
1916 .keywords: matrix, block, aij, compressed row, sparse, parallel
1917 
1918 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1919 @*/
1920 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1921                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1922 {
1923   Mat          B;
1924   Mat_MPIBAIJ  *b;
1925   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1926   int          flag1 = 0,flag2 = 0;
1927 
1928   PetscFunctionBegin;
1929   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1930 
1931   MPI_Comm_size(comm,&size);
1932   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
1933   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1934   if (!flag1 && !flag2 && size == 1) {
1935     if (M == PETSC_DECIDE) M = m;
1936     if (N == PETSC_DECIDE) N = n;
1937     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1938     PetscFunctionReturn(0);
1939   }
1940 
1941   *A = 0;
1942   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
1943   PLogObjectCreate(B);
1944   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1945   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1946   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1947 
1948   B->ops->destroy    = MatDestroy_MPIBAIJ;
1949   B->ops->view       = MatView_MPIBAIJ;
1950   B->mapping    = 0;
1951   B->factor     = 0;
1952   B->assembled  = PETSC_FALSE;
1953 
1954   B->insertmode = NOT_SET_VALUES;
1955   MPI_Comm_rank(comm,&b->rank);
1956   MPI_Comm_size(comm,&b->size);
1957 
1958   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1959     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1960   }
1961   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1962     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1963   }
1964   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1965     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1966   }
1967   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1968   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1969 
1970   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1971     work[0] = m; work[1] = n;
1972     mbs = m/bs; nbs = n/bs;
1973     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1974     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1975     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1976   }
1977   if (m == PETSC_DECIDE) {
1978     Mbs = M/bs;
1979     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1980     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1981     m   = mbs*bs;
1982   }
1983   if (n == PETSC_DECIDE) {
1984     Nbs = N/bs;
1985     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1986     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1987     n   = nbs*bs;
1988   }
1989   if (mbs*bs != m || nbs*bs != n) {
1990     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1991   }
1992 
1993   b->m = m; B->m = m;
1994   b->n = n; B->n = n;
1995   b->N = N; B->N = N;
1996   b->M = M; B->M = M;
1997   b->bs  = bs;
1998   b->bs2 = bs*bs;
1999   b->mbs = mbs;
2000   b->nbs = nbs;
2001   b->Mbs = Mbs;
2002   b->Nbs = Nbs;
2003 
2004   /* the information in the maps duplicates the information computed below, eventually
2005      we should remove the duplicate information that is not contained in the maps */
2006   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2007   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2008 
2009   /* build local table of row and column ownerships */
2010   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
2011   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2012   b->cowners = b->rowners + b->size + 2;
2013   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2014   b->rowners[0] = 0;
2015   for ( i=2; i<=b->size; i++ ) {
2016     b->rowners[i] += b->rowners[i-1];
2017   }
2018   b->rstart    = b->rowners[b->rank];
2019   b->rend      = b->rowners[b->rank+1];
2020   b->rstart_bs = b->rstart * bs;
2021   b->rend_bs   = b->rend * bs;
2022 
2023   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2024   b->cowners[0] = 0;
2025   for ( i=2; i<=b->size; i++ ) {
2026     b->cowners[i] += b->cowners[i-1];
2027   }
2028   b->cstart    = b->cowners[b->rank];
2029   b->cend      = b->cowners[b->rank+1];
2030   b->cstart_bs = b->cstart * bs;
2031   b->cend_bs   = b->cend * bs;
2032 
2033 
2034   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2035   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2036   PLogObjectParent(B,b->A);
2037   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2038   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2039   PLogObjectParent(B,b->B);
2040 
2041   /* build cache for off array entries formed */
2042   ierr = StashCreate_Private(comm,1,bs,&b->stash); CHKERRQ(ierr);
2043   ierr = StashCreate_Private(comm,bs,bs,&b->bstash); CHKERRQ(ierr);
2044   b->donotstash  = 0;
2045   b->colmap      = 0;
2046   b->garray      = 0;
2047   b->roworiented = 1;
2048 
2049   /* stuff used in block assembly */
2050   b->barray       = 0;
2051 
2052   /* stuff used for matrix vector multiply */
2053   b->lvec         = 0;
2054   b->Mvctx        = 0;
2055 
2056   /* stuff for MatGetRow() */
2057   b->rowindices   = 0;
2058   b->rowvalues    = 0;
2059   b->getrowactive = PETSC_FALSE;
2060 
2061   /* hash table stuff */
2062   b->ht           = 0;
2063   b->hd           = 0;
2064   b->ht_size      = 0;
2065   b->ht_flag      = 0;
2066   b->ht_fact      = 0;
2067   b->ht_total_ct  = 0;
2068   b->ht_insert_ct = 0;
2069 
2070   *A = B;
2071   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2072   if (flg) {
2073     double fact = 1.39;
2074     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2075     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2076     if (fact <= 1.0) fact = 1.39;
2077     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2078     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2079   }
2080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2081                                      "MatGetDiagonalBlock_MPIBAIJ",
2082                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #undef __FUNC__
2087 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2088 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2089 {
2090   Mat         mat;
2091   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2092   int         ierr, len=0, flg;
2093 
2094   PetscFunctionBegin;
2095   *newmat       = 0;
2096   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2097   PLogObjectCreate(mat);
2098   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2099   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2100   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2101   mat->ops->view       = MatView_MPIBAIJ;
2102   mat->factor          = matin->factor;
2103   mat->assembled       = PETSC_TRUE;
2104   mat->insertmode      = NOT_SET_VALUES;
2105 
2106   a->m = mat->m   = oldmat->m;
2107   a->n = mat->n   = oldmat->n;
2108   a->M = mat->M   = oldmat->M;
2109   a->N = mat->N   = oldmat->N;
2110 
2111   a->bs  = oldmat->bs;
2112   a->bs2 = oldmat->bs2;
2113   a->mbs = oldmat->mbs;
2114   a->nbs = oldmat->nbs;
2115   a->Mbs = oldmat->Mbs;
2116   a->Nbs = oldmat->Nbs;
2117 
2118   a->rstart       = oldmat->rstart;
2119   a->rend         = oldmat->rend;
2120   a->cstart       = oldmat->cstart;
2121   a->cend         = oldmat->cend;
2122   a->size         = oldmat->size;
2123   a->rank         = oldmat->rank;
2124   a->donotstash   = oldmat->donotstash;
2125   a->roworiented  = oldmat->roworiented;
2126   a->rowindices   = 0;
2127   a->rowvalues    = 0;
2128   a->getrowactive = PETSC_FALSE;
2129   a->barray       = 0;
2130   a->rstart_bs    = oldmat->rstart_bs;
2131   a->rend_bs      = oldmat->rend_bs;
2132   a->cstart_bs    = oldmat->cstart_bs;
2133   a->cend_bs      = oldmat->cend_bs;
2134 
2135   /* hash table stuff */
2136   a->ht           = 0;
2137   a->hd           = 0;
2138   a->ht_size      = 0;
2139   a->ht_flag      = oldmat->ht_flag;
2140   a->ht_fact      = oldmat->ht_fact;
2141   a->ht_total_ct  = 0;
2142   a->ht_insert_ct = 0;
2143 
2144 
2145   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2146   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2147   a->cowners = a->rowners + a->size + 2;
2148   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2149   ierr = StashCreate_Private(matin->comm,1,oldmat->bs,&a->stash); CHKERRQ(ierr);
2150   ierr = StashCreate_Private(matin->comm,oldmat->bs,oldmat->bs,&a->bstash); CHKERRQ(ierr);
2151   if (oldmat->colmap) {
2152 #if defined (USE_CTABLE)
2153   ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
2154 #else
2155     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2156     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2157     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2158 #endif
2159   } else a->colmap = 0;
2160   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2161     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2162     PLogObjectMemory(mat,len*sizeof(int));
2163     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2164   } else a->garray = 0;
2165 
2166   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2167   PLogObjectParent(mat,a->lvec);
2168   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2169 
2170   PLogObjectParent(mat,a->Mvctx);
2171   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
2172   PLogObjectParent(mat,a->A);
2173   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
2174   PLogObjectParent(mat,a->B);
2175   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2176   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2177   if (flg) {
2178     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2179   }
2180   *newmat = mat;
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 #include "sys.h"
2185 
2186 #undef __FUNC__
2187 #define __FUNC__ "MatLoad_MPIBAIJ"
2188 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2189 {
2190   Mat          A;
2191   int          i, nz, ierr, j,rstart, rend, fd;
2192   Scalar       *vals,*buf;
2193   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2194   MPI_Status   status;
2195   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2196   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2197   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2198   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2199   int          dcount,kmax,k,nzcount,tmp;
2200 
2201   PetscFunctionBegin;
2202   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2203 
2204   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2205   if (!rank) {
2206     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2207     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2208     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2209     if (header[3] < 0) {
2210       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2211     }
2212   }
2213 
2214   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2215   M = header[1]; N = header[2];
2216 
2217   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2218 
2219   /*
2220      This code adds extra rows to make sure the number of rows is
2221      divisible by the blocksize
2222   */
2223   Mbs        = M/bs;
2224   extra_rows = bs - M + bs*(Mbs);
2225   if (extra_rows == bs) extra_rows = 0;
2226   else                  Mbs++;
2227   if (extra_rows &&!rank) {
2228     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2229   }
2230 
2231   /* determine ownership of all rows */
2232   mbs = Mbs/size + ((Mbs % size) > rank);
2233   m   = mbs * bs;
2234   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2235   browners = rowners + size + 1;
2236   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2237   rowners[0] = 0;
2238   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2239   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2240   rstart = rowners[rank];
2241   rend   = rowners[rank+1];
2242 
2243   /* distribute row lengths to all processors */
2244   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2245   if (!rank) {
2246     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2247     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2248     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2249     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2250     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2251     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2252     PetscFree(sndcounts);
2253   } else {
2254     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2255   }
2256 
2257   if (!rank) {
2258     /* calculate the number of nonzeros on each processor */
2259     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2260     PetscMemzero(procsnz,size*sizeof(int));
2261     for ( i=0; i<size; i++ ) {
2262       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2263         procsnz[i] += rowlengths[j];
2264       }
2265     }
2266     PetscFree(rowlengths);
2267 
2268     /* determine max buffer needed and allocate it */
2269     maxnz = 0;
2270     for ( i=0; i<size; i++ ) {
2271       maxnz = PetscMax(maxnz,procsnz[i]);
2272     }
2273     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2274 
2275     /* read in my part of the matrix column indices  */
2276     nz = procsnz[0];
2277     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2278     mycols = ibuf;
2279     if (size == 1)  nz -= extra_rows;
2280     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2281     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2282 
2283     /* read in every ones (except the last) and ship off */
2284     for ( i=1; i<size-1; i++ ) {
2285       nz   = procsnz[i];
2286       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2287       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2288     }
2289     /* read in the stuff for the last proc */
2290     if ( size != 1 ) {
2291       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2292       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2293       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2294       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2295     }
2296     PetscFree(cols);
2297   } else {
2298     /* determine buffer space needed for message */
2299     nz = 0;
2300     for ( i=0; i<m; i++ ) {
2301       nz += locrowlens[i];
2302     }
2303     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2304     mycols = ibuf;
2305     /* receive message of column indices*/
2306     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2307     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2308     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2309   }
2310 
2311   /* loop over local rows, determining number of off diagonal entries */
2312   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2313   odlens = dlens + (rend-rstart);
2314   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2315   PetscMemzero(mask,3*Mbs*sizeof(int));
2316   masked1 = mask    + Mbs;
2317   masked2 = masked1 + Mbs;
2318   rowcount = 0; nzcount = 0;
2319   for ( i=0; i<mbs; i++ ) {
2320     dcount  = 0;
2321     odcount = 0;
2322     for ( j=0; j<bs; j++ ) {
2323       kmax = locrowlens[rowcount];
2324       for ( k=0; k<kmax; k++ ) {
2325         tmp = mycols[nzcount++]/bs;
2326         if (!mask[tmp]) {
2327           mask[tmp] = 1;
2328           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2329           else masked1[dcount++] = tmp;
2330         }
2331       }
2332       rowcount++;
2333     }
2334 
2335     dlens[i]  = dcount;
2336     odlens[i] = odcount;
2337 
2338     /* zero out the mask elements we set */
2339     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2340     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2341   }
2342 
2343   /* create our matrix */
2344   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2345          CHKERRQ(ierr);
2346   A = *newmat;
2347   MatSetOption(A,MAT_COLUMNS_SORTED);
2348 
2349   if (!rank) {
2350     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2351     /* read in my part of the matrix numerical values  */
2352     nz = procsnz[0];
2353     vals = buf;
2354     mycols = ibuf;
2355     if (size == 1)  nz -= extra_rows;
2356     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2357     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2358 
2359     /* insert into matrix */
2360     jj      = rstart*bs;
2361     for ( i=0; i<m; i++ ) {
2362       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2363       mycols += locrowlens[i];
2364       vals   += locrowlens[i];
2365       jj++;
2366     }
2367     /* read in other processors (except the last one) and ship out */
2368     for ( i=1; i<size-1; i++ ) {
2369       nz   = procsnz[i];
2370       vals = buf;
2371       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2372       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2373     }
2374     /* the last proc */
2375     if ( size != 1 ){
2376       nz   = procsnz[i] - extra_rows;
2377       vals = buf;
2378       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2379       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2380       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2381     }
2382     PetscFree(procsnz);
2383   } else {
2384     /* receive numeric values */
2385     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2386 
2387     /* receive message of values*/
2388     vals   = buf;
2389     mycols = ibuf;
2390     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2391     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2392     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2393 
2394     /* insert into matrix */
2395     jj      = rstart*bs;
2396     for ( i=0; i<m; i++ ) {
2397       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2398       mycols += locrowlens[i];
2399       vals   += locrowlens[i];
2400       jj++;
2401     }
2402   }
2403   PetscFree(locrowlens);
2404   PetscFree(buf);
2405   PetscFree(ibuf);
2406   PetscFree(rowners);
2407   PetscFree(dlens);
2408   PetscFree(mask);
2409   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2410   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 
2415 
2416 #undef __FUNC__
2417 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2418 /*@
2419    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2420 
2421    Input Parameters:
2422 .  mat  - the matrix
2423 .  fact - factor
2424 
2425    Collective on Mat
2426 
2427   Notes:
2428    This can also be set by the command line option: -mat_use_hash_table fact
2429 
2430 .keywords: matrix, hashtable, factor, HT
2431 
2432 .seealso: MatSetOption()
2433 @*/
2434 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2435 {
2436   Mat_MPIBAIJ *baij;
2437 
2438   PetscFunctionBegin;
2439   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2440   if (mat->type != MATMPIBAIJ) {
2441       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2442   }
2443   baij = (Mat_MPIBAIJ*) mat->data;
2444   baij->ht_fact = fact;
2445   PetscFunctionReturn(0);
2446 }
2447