xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision b833fc0d6de9ffb7984b704efec3d68e8cc57cab)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.162 1999/03/16 16:41:19 bsmith Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 /*
20      Local utility routine that creates a mapping from the global column
21    number to the local number in the off-diagonal part of the local
22    storage of the matrix.  This is done in a non scable way since the
23    length of colmap equals the global matrix length.
24 */
25 #undef __FUNC__
26 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
27 static int CreateColmap_MPIBAIJ_Private(Mat mat)
28 {
29   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
30   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
31   int         nbs = B->nbs,i,bs=B->bs;
32 #if defined (USE_CTABLE)
33   int         ierr;
34 #endif
35 
36   PetscFunctionBegin;
37 #if defined (USE_CTABLE)
38   ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr);
39   for ( i=0; i<nbs; i++ ){
40     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
41   }
42 #else
43   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
44   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
45   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
46   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
47 #endif
48   PetscFunctionReturn(0);
49 }
50 
51 #define CHUNKSIZE  10
52 
53 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
54 { \
55  \
56     brow = row/bs;  \
57     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
58     rmax = aimax[brow]; nrow = ailen[brow]; \
59       bcol = col/bs; \
60       ridx = row % bs; cidx = col % bs; \
61       low = 0; high = nrow; \
62       while (high-low > 3) { \
63         t = (low+high)/2; \
64         if (rp[t] > bcol) high = t; \
65         else              low  = t; \
66       } \
67       for ( _i=low; _i<high; _i++ ) { \
68         if (rp[_i] > bcol) break; \
69         if (rp[_i] == bcol) { \
70           bap  = ap +  bs2*_i + bs*cidx + ridx; \
71           if (addv == ADD_VALUES) *bap += value;  \
72           else                    *bap  = value;  \
73           goto a_noinsert; \
74         } \
75       } \
76       if (a->nonew == 1) goto a_noinsert; \
77       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
78       if (nrow >= rmax) { \
79         /* there is no extra room in row, therefore enlarge */ \
80         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
81         Scalar *new_a; \
82  \
83         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
84  \
85         /* malloc new storage space */ \
86         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
87         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
88         new_j   = (int *) (new_a + bs2*new_nz); \
89         new_i   = new_j + new_nz; \
90  \
91         /* copy over old data into new slots */ \
92         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
93         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
94         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
95         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
96         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
97                                                            len*sizeof(int)); \
98         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
99         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
100         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
101                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
102         /* free up old matrix storage */ \
103         PetscFree(a->a);  \
104         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
105         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
106         a->singlemalloc = 1; \
107  \
108         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
110         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
111         a->maxnz += bs2*CHUNKSIZE; \
112         a->reallocs++; \
113         a->nz++; \
114       } \
115       N = nrow++ - 1;  \
116       /* shift up all the later entries in this row */ \
117       for ( ii=N; ii>=_i; ii-- ) { \
118         rp[ii+1] = rp[ii]; \
119         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
120       } \
121       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
122       rp[_i]                      = bcol;  \
123       ap[bs2*_i + bs*cidx + ridx] = value;  \
124       a_noinsert:; \
125     ailen[brow] = nrow; \
126 }
127 
128 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
129 { \
130  \
131     brow = row/bs;  \
132     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
133     rmax = bimax[brow]; nrow = bilen[brow]; \
134       bcol = col/bs; \
135       ridx = row % bs; cidx = col % bs; \
136       low = 0; high = nrow; \
137       while (high-low > 3) { \
138         t = (low+high)/2; \
139         if (rp[t] > bcol) high = t; \
140         else              low  = t; \
141       } \
142       for ( _i=low; _i<high; _i++ ) { \
143         if (rp[_i] > bcol) break; \
144         if (rp[_i] == bcol) { \
145           bap  = ap +  bs2*_i + bs*cidx + ridx; \
146           if (addv == ADD_VALUES) *bap += value;  \
147           else                    *bap  = value;  \
148           goto b_noinsert; \
149         } \
150       } \
151       if (b->nonew == 1) goto b_noinsert; \
152       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
153       if (nrow >= rmax) { \
154         /* there is no extra room in row, therefore enlarge */ \
155         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
156         Scalar *new_a; \
157  \
158         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
159  \
160         /* malloc new storage space */ \
161         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
162         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
163         new_j   = (int *) (new_a + bs2*new_nz); \
164         new_i   = new_j + new_nz; \
165  \
166         /* copy over old data into new slots */ \
167         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
168         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
169         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
170         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
171         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
172                                                            len*sizeof(int)); \
173         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
174         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
175         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
176                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
177         /* free up old matrix storage */ \
178         PetscFree(b->a);  \
179         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
180         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
181         b->singlemalloc = 1; \
182  \
183         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
184         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
185         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
186         b->maxnz += bs2*CHUNKSIZE; \
187         b->reallocs++; \
188         b->nz++; \
189       } \
190       N = nrow++ - 1;  \
191       /* shift up all the later entries in this row */ \
192       for ( ii=N; ii>=_i; ii-- ) { \
193         rp[ii+1] = rp[ii]; \
194         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
195       } \
196       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
197       rp[_i]                      = bcol;  \
198       ap[bs2*_i + bs*cidx + ridx] = value;  \
199       b_noinsert:; \
200     bilen[brow] = nrow; \
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ "MatSetValues_MPIBAIJ"
205 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
206 {
207   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
208   Scalar      value;
209   int         ierr,i,j,row,col;
210   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
211   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
212   int         cend_orig=baij->cend_bs,bs=baij->bs;
213 
214   /* Some Variables required in the macro */
215   Mat         A = baij->A;
216   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
217   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
218   Scalar      *aa=a->a;
219 
220   Mat         B = baij->B;
221   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
222   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
223   Scalar      *ba=b->a;
224 
225   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
226   int         low,high,t,ridx,cidx,bs2=a->bs2;
227   Scalar      *ap,*bap;
228 
229   PetscFunctionBegin;
230   for ( i=0; i<m; i++ ) {
231     if (im[i] < 0) continue;
232 #if defined(USE_PETSC_BOPT_g)
233     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
234 #endif
235     if (im[i] >= rstart_orig && im[i] < rend_orig) {
236       row = im[i] - rstart_orig;
237       for ( j=0; j<n; j++ ) {
238         if (in[j] >= cstart_orig && in[j] < cend_orig){
239           col = in[j] - cstart_orig;
240           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
241           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
242           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         } else if (in[j] < 0) continue;
244 #if defined(USE_PETSC_BOPT_g)
245         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
246 #endif
247         else {
248           if (mat->was_assembled) {
249             if (!baij->colmap) {
250               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
251             }
252 #if defined (USE_CTABLE)
253             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr);
254             col  = col - 1 + in[j]%bs;
255 #else
256             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
257 #endif
258             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
259               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
260               col =  in[j];
261               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
262               B = baij->B;
263               b = (Mat_SeqBAIJ *) (B)->data;
264               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
265               ba=b->a;
266             }
267           } else col = in[j];
268           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
269           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
270           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
271         }
272       }
273     } else {
274       if ( !baij->donotstash) {
275         if (roworiented) {
276           ierr = StashValuesRoworiented_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
277         } else {
278           ierr = StashValuesColumnoriented_Private(&baij->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
279         }
280       }
281     }
282   }
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNC__
287 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
288 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
289 {
290   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
291   Scalar      *value,*barray=baij->barray;
292   int         ierr,i,j,ii,jj,row,col;
293   int         roworiented = baij->roworiented,rstart=baij->rstart ;
294   int         rend=baij->rend,cstart=baij->cstart,stepval;
295   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
296 
297   if(!barray) {
298     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
299   }
300 
301   if (roworiented) {
302     stepval = (n-1)*bs;
303   } else {
304     stepval = (m-1)*bs;
305   }
306   for ( i=0; i<m; i++ ) {
307     if (im[i] < 0) continue;
308 #if defined(USE_PETSC_BOPT_g)
309     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
310 #endif
311     if (im[i] >= rstart && im[i] < rend) {
312       row = im[i] - rstart;
313       for ( j=0; j<n; j++ ) {
314         /* If NumCol = 1 then a copy is not required */
315         if ((roworiented) && (n == 1)) {
316           barray = v + i*bs2;
317         } else if((!roworiented) && (m == 1)) {
318           barray = v + j*bs2;
319         } else { /* Here a copy is required */
320           if (roworiented) {
321             value = v + i*(stepval+bs)*bs + j*bs;
322           } else {
323             value = v + j*(stepval+bs)*bs + i*bs;
324           }
325           for ( ii=0; ii<bs; ii++,value+=stepval ) {
326             for (jj=0; jj<bs; jj++ ) {
327               *barray++  = *value++;
328             }
329           }
330           barray -=bs2;
331         }
332 
333         if (in[j] >= cstart && in[j] < cend){
334           col  = in[j] - cstart;
335           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
336         }
337         else if (in[j] < 0) continue;
338 #if defined(USE_PETSC_BOPT_g)
339         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
340 #endif
341         else {
342           if (mat->was_assembled) {
343             if (!baij->colmap) {
344               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
345             }
346 
347 #if defined(USE_PETSC_BOPT_g)
348 #if defined (USE_CTABLE)
349             { int data;
350             ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr);
351             if((data - 1) % bs)
352 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
353             }
354 #else
355             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
356 #endif
357 #endif
358 #if defined (USE_CTABLE)
359 	    ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr);
360             col  = (col - 1)/bs;
361 #else
362             col = (baij->colmap[in[j]] - 1)/bs;
363 #endif
364             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
365               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
366               col =  in[j];
367             }
368           }
369           else col = in[j];
370           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
371         }
372       }
373     } else {
374       if (!baij->donotstash) {
375         if (roworiented) {
376           ierr = StashValuesRoworientedBlocked_Private(
377                     &baij->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
378         } else {
379           ierr = StashValuesColumnorientedBlocked_Private(
380                     &baij->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
381         }
382       }
383     }
384   }
385   PetscFunctionReturn(0);
386 }
387 #define HASH_KEY 0.6180339887
388 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
389 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
390 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
391 #undef __FUNC__
392 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
393 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
394 {
395   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
396   int         ierr,i,j,row,col;
397   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
398   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
399   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
400   double      tmp;
401   Scalar      ** HD = baij->hd,value;
402 #if defined(USE_PETSC_BOPT_g)
403   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
404 #endif
405 
406   PetscFunctionBegin;
407 
408   for ( i=0; i<m; i++ ) {
409 #if defined(USE_PETSC_BOPT_g)
410     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
411     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
412 #endif
413       row = im[i];
414     if (row >= rstart_orig && row < rend_orig) {
415       for ( j=0; j<n; j++ ) {
416         col = in[j];
417         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
418         /* Look up into the Hash Table */
419         key = (row/bs)*Nbs+(col/bs)+1;
420         h1  = HASH(size,key,tmp);
421 
422 
423         idx = h1;
424 #if defined(USE_PETSC_BOPT_g)
425         insert_ct++;
426         total_ct++;
427         if (HT[idx] != key) {
428           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
429           if (idx == size) {
430             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
431             if (idx == h1) {
432               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
433             }
434           }
435         }
436 #else
437         if (HT[idx] != key) {
438           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
439           if (idx == size) {
440             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
441             if (idx == h1) {
442               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
443             }
444           }
445         }
446 #endif
447         /* A HASH table entry is found, so insert the values at the correct address */
448         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
449         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
450       }
451     } else {
452       if (!baij->donotstash) {
453         if (roworiented) {
454           ierr = StashValuesRoworiented_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
455         } else {
456           ierr = StashValuesColumnoriented_Private(&baij->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
457         }
458       }
459     }
460   }
461 #if defined(USE_PETSC_BOPT_g)
462   baij->ht_total_ct = total_ct;
463   baij->ht_insert_ct = insert_ct;
464 #endif
465   PetscFunctionReturn(0);
466 }
467 
468 #undef __FUNC__
469 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
470 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
471 {
472   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
473   int         ierr,i,j,ii,jj,row,col,k,l;
474   int         roworiented = baij->roworiented,rstart=baij->rstart ;
475   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
476   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
477   double      tmp;
478   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
479 #if defined(USE_PETSC_BOPT_g)
480   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
481 #endif
482 
483   PetscFunctionBegin;
484 
485   if (roworiented) {
486     stepval = (n-1)*bs;
487   } else {
488     stepval = (m-1)*bs;
489   }
490   for ( i=0; i<m; i++ ) {
491 #if defined(USE_PETSC_BOPT_g)
492     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
493     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
494 #endif
495     row   = im[i];
496     v_t   = v + i*bs2;
497     if (row >= rstart && row < rend) {
498       for ( j=0; j<n; j++ ) {
499         col = in[j];
500 
501         /* Look up into the Hash Table */
502         key = row*Nbs+col+1;
503         h1  = HASH(size,key,tmp);
504 
505         idx = h1;
506 #if defined(USE_PETSC_BOPT_g)
507         total_ct++;
508         insert_ct++;
509        if (HT[idx] != key) {
510           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
511           if (idx == size) {
512             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
513             if (idx == h1) {
514               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
515             }
516           }
517         }
518 #else
519         if (HT[idx] != key) {
520           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
521           if (idx == size) {
522             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
523             if (idx == h1) {
524               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
525             }
526           }
527         }
528 #endif
529         baij_a = HD[idx];
530         if (roworiented) {
531           /*value = v + i*(stepval+bs)*bs + j*bs;*/
532           /* value = v + (i*(stepval+bs)+j)*bs; */
533           value = v_t;
534           v_t  += bs;
535           if (addv == ADD_VALUES) {
536             for ( ii=0; ii<bs; ii++,value+=stepval) {
537               for ( jj=ii; jj<bs2; jj+=bs ) {
538                 baij_a[jj]  += *value++;
539               }
540             }
541           } else {
542             for ( ii=0; ii<bs; ii++,value+=stepval) {
543               for ( jj=ii; jj<bs2; jj+=bs ) {
544                 baij_a[jj]  = *value++;
545               }
546             }
547           }
548         } else {
549           value = v + j*(stepval+bs)*bs + i*bs;
550           if (addv == ADD_VALUES) {
551             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
552               for ( jj=0; jj<bs; jj++ ) {
553                 baij_a[jj]  += *value++;
554               }
555             }
556           } else {
557             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
558               for ( jj=0; jj<bs; jj++ ) {
559                 baij_a[jj]  = *value++;
560               }
561             }
562           }
563         }
564       }
565     } else {
566       if (!baij->donotstash) {
567         if (roworiented ) {
568           row   = im[i]*bs;
569           value = v + i*(stepval+bs)*bs;
570           for ( j=0; j<bs; j++,row++ ) {
571             for ( k=0; k<n; k++ ) {
572               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
573                 ierr = StashValuesRoworiented_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr);
574               }
575             }
576           }
577         } else {
578           for ( j=0; j<n; j++ ) {
579             value = v + j*(stepval+bs)*bs + i*bs;
580             col   = in[j]*bs;
581             for ( k=0; k<bs; k++,col++,value+=stepval) {
582               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
583                 ierr = StashValuesRoworiented_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr);
584               }
585             }
586           }
587         }
588       }
589     }
590   }
591 #if defined(USE_PETSC_BOPT_g)
592   baij->ht_total_ct = total_ct;
593   baij->ht_insert_ct = insert_ct;
594 #endif
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNC__
599 #define __FUNC__ "MatGetValues_MPIBAIJ"
600 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
601 {
602   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
603   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
604   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
605 
606   PetscFunctionBegin;
607   for ( i=0; i<m; i++ ) {
608     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
609     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
610     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
611       row = idxm[i] - bsrstart;
612       for ( j=0; j<n; j++ ) {
613         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
614         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
615         if (idxn[j] >= bscstart && idxn[j] < bscend){
616           col = idxn[j] - bscstart;
617           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
618         } else {
619           if (!baij->colmap) {
620             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
621           }
622 #if defined (USE_CTABLE)
623           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr);
624           data --;
625 #else
626           data = baij->colmap[idxn[j]/bs]-1;
627 #endif
628           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
629           else {
630             col  = data + idxn[j]%bs;
631             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
632           }
633         }
634       }
635     } else {
636       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
637     }
638   }
639  PetscFunctionReturn(0);
640 }
641 
642 #undef __FUNC__
643 #define __FUNC__ "MatNorm_MPIBAIJ"
644 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
645 {
646   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
647   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
648   int        ierr, i,bs2=baij->bs2;
649   double     sum = 0.0;
650   Scalar     *v;
651 
652   PetscFunctionBegin;
653   if (baij->size == 1) {
654     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
655   } else {
656     if (type == NORM_FROBENIUS) {
657       v = amat->a;
658       for (i=0; i<amat->nz*bs2; i++ ) {
659 #if defined(USE_PETSC_COMPLEX)
660         sum += PetscReal(PetscConj(*v)*(*v)); v++;
661 #else
662         sum += (*v)*(*v); v++;
663 #endif
664       }
665       v = bmat->a;
666       for (i=0; i<bmat->nz*bs2; i++ ) {
667 #if defined(USE_PETSC_COMPLEX)
668         sum += PetscReal(PetscConj(*v)*(*v)); v++;
669 #else
670         sum += (*v)*(*v); v++;
671 #endif
672       }
673       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
674       *norm = sqrt(*norm);
675     } else {
676       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
677     }
678   }
679   PetscFunctionReturn(0);
680 }
681 
682 
683 /*
684   Creates the hash table, and sets the table
685   This table is created only once.
686   If new entried need to be added to the matrix
687   then the hash table has to be destroyed and
688   recreated.
689 */
690 #undef __FUNC__
691 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
692 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
693 {
694   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
695   Mat         A = baij->A, B=baij->B;
696   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
697   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
698   int         size,bs2=baij->bs2,rstart=baij->rstart;
699   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
700   int         *HT,key;
701   Scalar      **HD;
702   double      tmp;
703 #if defined(USE_PETSC_BOPT_g)
704   int         ct=0,max=0;
705 #endif
706 
707   PetscFunctionBegin;
708   baij->ht_size=(int)(factor*nz);
709   size = baij->ht_size;
710 
711   if (baij->ht) {
712     PetscFunctionReturn(0);
713   }
714 
715   /* Allocate Memory for Hash Table */
716   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
717   baij->ht = (int*)(baij->hd + size);
718   HD = baij->hd;
719   HT = baij->ht;
720 
721 
722   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
723 
724 
725   /* Loop Over A */
726   for ( i=0; i<a->mbs; i++ ) {
727     for ( j=ai[i]; j<ai[i+1]; j++ ) {
728       row = i+rstart;
729       col = aj[j]+cstart;
730 
731       key = row*Nbs + col + 1;
732       h1  = HASH(size,key,tmp);
733       for ( k=0; k<size; k++ ){
734         if (HT[(h1+k)%size] == 0.0) {
735           HT[(h1+k)%size] = key;
736           HD[(h1+k)%size] = a->a + j*bs2;
737           break;
738 #if defined(USE_PETSC_BOPT_g)
739         } else {
740           ct++;
741 #endif
742         }
743       }
744 #if defined(USE_PETSC_BOPT_g)
745       if (k> max) max = k;
746 #endif
747     }
748   }
749   /* Loop Over B */
750   for ( i=0; i<b->mbs; i++ ) {
751     for ( j=bi[i]; j<bi[i+1]; j++ ) {
752       row = i+rstart;
753       col = garray[bj[j]];
754       key = row*Nbs + col + 1;
755       h1  = HASH(size,key,tmp);
756       for ( k=0; k<size; k++ ){
757         if (HT[(h1+k)%size] == 0.0) {
758           HT[(h1+k)%size] = key;
759           HD[(h1+k)%size] = b->a + j*bs2;
760           break;
761 #if defined(USE_PETSC_BOPT_g)
762         } else {
763           ct++;
764 #endif
765         }
766       }
767 #if defined(USE_PETSC_BOPT_g)
768       if (k> max) max = k;
769 #endif
770     }
771   }
772 
773   /* Print Summary */
774 #if defined(USE_PETSC_BOPT_g)
775   for ( i=0,j=0; i<size; i++)
776     if (HT[i]) {j++;}
777   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
778            (j== 0)? 0.0:((double)(ct+j))/j,max);
779 #endif
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNC__
784 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
785 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
786 {
787   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
788   int         ierr,nstash,reallocs;
789   InsertMode  addv;
790 
791   PetscFunctionBegin;
792   if (baij->donotstash) {
793     PetscFunctionReturn(0);
794   }
795 
796   /* make sure all processors are either in INSERTMODE or ADDMODE */
797   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
798   if (addv == (ADD_VALUES|INSERT_VALUES)) {
799     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
800   }
801   mat->insertmode = addv; /* in case this processor had no cache */
802 
803   ierr =  StashScatterBegin_Private(&baij->stash,baij->rowners_bs); CHKERRQ(ierr);
804   ierr =  StashScatterBegin_Private(&baij->bstash,baij->rowners); CHKERRQ(ierr);
805   ierr = StashGetInfo_Private(&baij->stash,&nstash,&reallocs); CHKERRQ(ierr);
806   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",
807            nstash,reallocs);
808   ierr = StashGetInfo_Private(&baij->stash,&nstash,&reallocs); CHKERRQ(ierr);
809   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",
810            nstash,reallocs);
811   PetscFunctionReturn(0);
812 }
813 
814 #undef __FUNC__
815 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
816 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
817 {
818   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
819   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
820   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
821   int         *row,*col,other_disassembled,r1,r2,r3;
822   Scalar      *val;
823   InsertMode  addv = mat->insertmode;
824 
825   PetscFunctionBegin;
826   if (!baij->donotstash) {
827     while (1) {
828       ierr = StashScatterGetMesg_Private(&baij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
829       if (!flg) break;
830 
831       for ( i=0; i<n; ) {
832         /* Now identify the consecutive vals belonging to the same row */
833         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
834         if (j < n) ncols = j-i;
835         else       ncols = n-i;
836         /* Now assemble all these values with a single function call */
837         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
838         i = j;
839       }
840     }
841     ierr = StashScatterEnd_Private(&baij->stash); CHKERRQ(ierr);
842     /* Now process the block-stash. Since the values are stashed column-oriented,
843        set the roworiented flag to column oriented, and after MatSetValues()
844        restore the original flags */
845     r1 = baij->roworiented;
846     r2 = a->roworiented;
847     r3 = b->roworiented;
848     baij->roworiented = 0;
849     a->roworiented    = 0;
850     b->roworiented    = 0;
851     while (1) {
852       ierr = StashScatterGetMesg_Private(&baij->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
853       if (!flg) break;
854 
855       for ( i=0; i<n; ) {
856         /* Now identify the consecutive vals belonging to the same row */
857         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
858         if (j < n) ncols = j-i;
859         else       ncols = n-i;
860         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr);
861         i = j;
862       }
863     }
864     ierr = StashScatterEnd_Private(&baij->bstash); CHKERRQ(ierr);
865     baij->roworiented = r1;
866     a->roworiented    = r2;
867     b->roworiented    = r3;
868   }
869 
870   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
871   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
872 
873   /* determine if any processor has disassembled, if so we must
874      also disassemble ourselfs, in order that we may reassemble. */
875   /*
876      if nonzero structure of submatrix B cannot change then we know that
877      no processor disassembled thus we can skip this stuff
878   */
879   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
880     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
881     if (mat->was_assembled && !other_disassembled) {
882       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
883     }
884   }
885 
886   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
887     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
888   }
889   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
890   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
891 
892 #if defined(USE_PETSC_BOPT_g)
893   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
894     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
895              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
896     baij->ht_total_ct  = 0;
897     baij->ht_insert_ct = 0;
898   }
899 #endif
900   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
901     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
902     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
903     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
904   }
905 
906   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
907   PetscFunctionReturn(0);
908 }
909 
910 #undef __FUNC__
911 #define __FUNC__ "MatView_MPIBAIJ_Binary"
912 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
913 {
914   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
915   int          ierr;
916 
917   PetscFunctionBegin;
918   if (baij->size == 1) {
919     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
920   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
921   PetscFunctionReturn(0);
922 }
923 
924 #undef __FUNC__
925 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
926 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
927 {
928   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
929   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
930   FILE         *fd;
931   ViewerType   vtype;
932 
933   PetscFunctionBegin;
934   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
935   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
936     ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr);
937     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
938       MatInfo info;
939       MPI_Comm_rank(mat->comm,&rank);
940       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
941       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
942       PetscSequentialPhaseBegin(mat->comm,1);
943       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
944               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
945               baij->bs,(int)info.memory);
946       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
947       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
948       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
949       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
950       fflush(fd);
951       PetscSequentialPhaseEnd(mat->comm,1);
952       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
953       PetscFunctionReturn(0);
954     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
955       PetscPrintf(mat->comm,"  block size is %d\n",bs);
956       PetscFunctionReturn(0);
957     }
958   }
959 
960   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
961     Draw       draw;
962     PetscTruth isnull;
963     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
964     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
965   }
966 
967   if (size == 1) {
968     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
969   } else {
970     /* assemble the entire matrix onto first processor. */
971     Mat         A;
972     Mat_SeqBAIJ *Aloc;
973     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
974     int         mbs = baij->mbs;
975     Scalar      *a;
976 
977     if (!rank) {
978       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
979     } else {
980       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
981     }
982     PLogObjectParent(mat,A);
983 
984     /* copy over the A part */
985     Aloc = (Mat_SeqBAIJ*) baij->A->data;
986     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
987     rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
988 
989     for ( i=0; i<mbs; i++ ) {
990       rvals[0] = bs*(baij->rstart + i);
991       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
992       for ( j=ai[i]; j<ai[i+1]; j++ ) {
993         col = (baij->cstart+aj[j])*bs;
994         for (k=0; k<bs; k++ ) {
995           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
996           col++; a += bs;
997         }
998       }
999     }
1000     /* copy over the B part */
1001     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1002     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1003     for ( i=0; i<mbs; i++ ) {
1004       rvals[0] = bs*(baij->rstart + i);
1005       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1006       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1007         col = baij->garray[aj[j]]*bs;
1008         for (k=0; k<bs; k++ ) {
1009           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1010           col++; a += bs;
1011         }
1012       }
1013     }
1014     PetscFree(rvals);
1015     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1016     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1017     /*
1018        Everyone has to call to draw the matrix since the graphics waits are
1019        synchronized across all processors that share the Draw object
1020     */
1021     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1022       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1023     }
1024     ierr = MatDestroy(A); CHKERRQ(ierr);
1025   }
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 
1030 
1031 #undef __FUNC__
1032 #define __FUNC__ "MatView_MPIBAIJ"
1033 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1034 {
1035   int         ierr;
1036   ViewerType  vtype;
1037 
1038   PetscFunctionBegin;
1039   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1040   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1041       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
1042     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
1043   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1044     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1045   } else {
1046     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNC__
1052 #define __FUNC__ "MatDestroy_MPIBAIJ"
1053 int MatDestroy_MPIBAIJ(Mat mat)
1054 {
1055   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1056   int         ierr;
1057 
1058   PetscFunctionBegin;
1059   if (--mat->refct > 0) PetscFunctionReturn(0);
1060 
1061   if (mat->mapping) {
1062     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
1063   }
1064   if (mat->bmapping) {
1065     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
1066   }
1067   if (mat->rmap) {
1068     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1069   }
1070   if (mat->cmap) {
1071     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1072   }
1073 #if defined(USE_PETSC_LOG)
1074   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1075 #endif
1076 
1077   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1078   ierr = StashDestroy_Private(&baij->bstash); CHKERRQ(ierr);
1079   PetscFree(baij->rowners);
1080   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1081   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1082 #if defined (USE_CTABLE)
1083   if (baij->colmap) TableDelete(baij->colmap);
1084 #else
1085   if (baij->colmap) PetscFree(baij->colmap);
1086 #endif
1087   if (baij->garray) PetscFree(baij->garray);
1088   if (baij->lvec)   VecDestroy(baij->lvec);
1089   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1090   if (baij->rowvalues) PetscFree(baij->rowvalues);
1091   if (baij->barray) PetscFree(baij->barray);
1092   if (baij->hd) PetscFree(baij->hd);
1093   PetscFree(baij);
1094   PLogObjectDestroy(mat);
1095   PetscHeaderDestroy(mat);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNC__
1100 #define __FUNC__ "MatMult_MPIBAIJ"
1101 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1102 {
1103   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1104   int         ierr, nt;
1105 
1106   PetscFunctionBegin;
1107   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1108   if (nt != a->n) {
1109     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1110   }
1111   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1112   if (nt != a->m) {
1113     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1114   }
1115   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1116   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1117   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1118   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1119   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNC__
1124 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1125 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1126 {
1127   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1128   int        ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1132   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1133   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1134   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNC__
1139 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1140 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1141 {
1142   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1143   int         ierr;
1144 
1145   PetscFunctionBegin;
1146   /* do nondiagonal part */
1147   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1148   /* send it on its way */
1149   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1150   /* do local part */
1151   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1152   /* receive remote parts: note this assumes the values are not actually */
1153   /* inserted in yy until the next line, which is true for my implementation*/
1154   /* but is not perhaps always true. */
1155   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNC__
1160 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1161 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1162 {
1163   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1164   int         ierr;
1165 
1166   PetscFunctionBegin;
1167   /* do nondiagonal part */
1168   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1169   /* send it on its way */
1170   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1171   /* do local part */
1172   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1173   /* receive remote parts: note this assumes the values are not actually */
1174   /* inserted in yy until the next line, which is true for my implementation*/
1175   /* but is not perhaps always true. */
1176   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 /*
1181   This only works correctly for square matrices where the subblock A->A is the
1182    diagonal block
1183 */
1184 #undef __FUNC__
1185 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1186 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1187 {
1188   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1189   int         ierr;
1190 
1191   PetscFunctionBegin;
1192   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1193   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNC__
1198 #define __FUNC__ "MatScale_MPIBAIJ"
1199 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1200 {
1201   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1202   int         ierr;
1203 
1204   PetscFunctionBegin;
1205   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1206   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNC__
1211 #define __FUNC__ "MatGetSize_MPIBAIJ"
1212 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1213 {
1214   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1215 
1216   PetscFunctionBegin;
1217   if (m) *m = mat->M;
1218   if (n) *n = mat->N;
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 #undef __FUNC__
1223 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1224 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1225 {
1226   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1227 
1228   PetscFunctionBegin;
1229   *m = mat->m; *n = mat->n;
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNC__
1234 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1235 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1236 {
1237   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1238 
1239   PetscFunctionBegin;
1240   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNC__
1245 #define __FUNC__ "MatGetRow_MPIBAIJ"
1246 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1247 {
1248   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1249   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1250   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1251   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1252   int        *cmap, *idx_p,cstart = mat->cstart;
1253 
1254   PetscFunctionBegin;
1255   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1256   mat->getrowactive = PETSC_TRUE;
1257 
1258   if (!mat->rowvalues && (idx || v)) {
1259     /*
1260         allocate enough space to hold information from the longest row.
1261     */
1262     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1263     int     max = 1,mbs = mat->mbs,tmp;
1264     for ( i=0; i<mbs; i++ ) {
1265       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1266       if (max < tmp) { max = tmp; }
1267     }
1268     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1269     CHKPTRQ(mat->rowvalues);
1270     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1271   }
1272 
1273   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1274   lrow = row - brstart;
1275 
1276   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1277   if (!v)   {pvA = 0; pvB = 0;}
1278   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1279   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1280   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1281   nztot = nzA + nzB;
1282 
1283   cmap  = mat->garray;
1284   if (v  || idx) {
1285     if (nztot) {
1286       /* Sort by increasing column numbers, assuming A and B already sorted */
1287       int imark = -1;
1288       if (v) {
1289         *v = v_p = mat->rowvalues;
1290         for ( i=0; i<nzB; i++ ) {
1291           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1292           else break;
1293         }
1294         imark = i;
1295         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1296         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1297       }
1298       if (idx) {
1299         *idx = idx_p = mat->rowindices;
1300         if (imark > -1) {
1301           for ( i=0; i<imark; i++ ) {
1302             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1303           }
1304         } else {
1305           for ( i=0; i<nzB; i++ ) {
1306             if (cmap[cworkB[i]/bs] < cstart)
1307               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1308             else break;
1309           }
1310           imark = i;
1311         }
1312         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1313         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1314       }
1315     } else {
1316       if (idx) *idx = 0;
1317       if (v)   *v   = 0;
1318     }
1319   }
1320   *nz = nztot;
1321   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1322   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNC__
1327 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1328 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1329 {
1330   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1331 
1332   PetscFunctionBegin;
1333   if (baij->getrowactive == PETSC_FALSE) {
1334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1335   }
1336   baij->getrowactive = PETSC_FALSE;
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNC__
1341 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1342 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1343 {
1344   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1345 
1346   PetscFunctionBegin;
1347   *bs = baij->bs;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNC__
1352 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1353 int MatZeroEntries_MPIBAIJ(Mat A)
1354 {
1355   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1356   int         ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1360   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNC__
1365 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1366 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1367 {
1368   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1369   Mat         A = a->A, B = a->B;
1370   int         ierr;
1371   double      isend[5], irecv[5];
1372 
1373   PetscFunctionBegin;
1374   info->block_size     = (double)a->bs;
1375   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1376   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1377   isend[3] = info->memory;  isend[4] = info->mallocs;
1378   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1379   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1380   isend[3] += info->memory;  isend[4] += info->mallocs;
1381   if (flag == MAT_LOCAL) {
1382     info->nz_used      = isend[0];
1383     info->nz_allocated = isend[1];
1384     info->nz_unneeded  = isend[2];
1385     info->memory       = isend[3];
1386     info->mallocs      = isend[4];
1387   } else if (flag == MAT_GLOBAL_MAX) {
1388     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1389     info->nz_used      = irecv[0];
1390     info->nz_allocated = irecv[1];
1391     info->nz_unneeded  = irecv[2];
1392     info->memory       = irecv[3];
1393     info->mallocs      = irecv[4];
1394   } else if (flag == MAT_GLOBAL_SUM) {
1395     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1396     info->nz_used      = irecv[0];
1397     info->nz_allocated = irecv[1];
1398     info->nz_unneeded  = irecv[2];
1399     info->memory       = irecv[3];
1400     info->mallocs      = irecv[4];
1401   } else {
1402     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1403   }
1404   info->rows_global       = (double)a->M;
1405   info->columns_global    = (double)a->N;
1406   info->rows_local        = (double)a->m;
1407   info->columns_local     = (double)a->N;
1408   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1409   info->fill_ratio_needed = 0;
1410   info->factor_mallocs    = 0;
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNC__
1415 #define __FUNC__ "MatSetOption_MPIBAIJ"
1416 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1417 {
1418   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1419   int         ierr;
1420 
1421   PetscFunctionBegin;
1422   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1423       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1424       op == MAT_COLUMNS_UNSORTED ||
1425       op == MAT_COLUMNS_SORTED ||
1426       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1427       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1428         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1429         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1430   } else if (op == MAT_ROW_ORIENTED) {
1431         a->roworiented = 1;
1432         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1433         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1434   } else if (op == MAT_ROWS_SORTED ||
1435              op == MAT_ROWS_UNSORTED ||
1436              op == MAT_SYMMETRIC ||
1437              op == MAT_STRUCTURALLY_SYMMETRIC ||
1438              op == MAT_YES_NEW_DIAGONALS ||
1439              op == MAT_USE_HASH_TABLE) {
1440     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1441   } else if (op == MAT_COLUMN_ORIENTED) {
1442     a->roworiented = 0;
1443     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1444     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1445   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1446     a->donotstash = 1;
1447   } else if (op == MAT_NO_NEW_DIAGONALS) {
1448     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1449   } else if (op == MAT_USE_HASH_TABLE) {
1450     a->ht_flag = 1;
1451   } else {
1452     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNC__
1458 #define __FUNC__ "MatTranspose_MPIBAIJ("
1459 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1460 {
1461   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1462   Mat_SeqBAIJ *Aloc;
1463   Mat        B;
1464   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1465   int        bs=baij->bs,mbs=baij->mbs;
1466   Scalar     *a;
1467 
1468   PetscFunctionBegin;
1469   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1470   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1471   CHKERRQ(ierr);
1472 
1473   /* copy over the A part */
1474   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1475   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1476   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1477 
1478   for ( i=0; i<mbs; i++ ) {
1479     rvals[0] = bs*(baij->rstart + i);
1480     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1481     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1482       col = (baij->cstart+aj[j])*bs;
1483       for (k=0; k<bs; k++ ) {
1484         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1485         col++; a += bs;
1486       }
1487     }
1488   }
1489   /* copy over the B part */
1490   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1491   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1492   for ( i=0; i<mbs; i++ ) {
1493     rvals[0] = bs*(baij->rstart + i);
1494     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1495     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1496       col = baij->garray[aj[j]]*bs;
1497       for (k=0; k<bs; k++ ) {
1498         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1499         col++; a += bs;
1500       }
1501     }
1502   }
1503   PetscFree(rvals);
1504   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1505   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1506 
1507   if (matout != PETSC_NULL) {
1508     *matout = B;
1509   } else {
1510     PetscOps *Abops;
1511     MatOps   Aops;
1512 
1513     /* This isn't really an in-place transpose .... but free data structures from baij */
1514     PetscFree(baij->rowners);
1515     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1516     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1517 #if defined (USE_CTABLE)
1518     if (baij->colmap) TableDelete(baij->colmap);
1519 #else
1520     if (baij->colmap) PetscFree(baij->colmap);
1521 #endif
1522     if (baij->garray) PetscFree(baij->garray);
1523     if (baij->lvec) VecDestroy(baij->lvec);
1524     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1525     PetscFree(baij);
1526 
1527     /*
1528        This is horrible, horrible code. We need to keep the
1529       A pointers for the bops and ops but copy everything
1530       else from C.
1531     */
1532     Abops = A->bops;
1533     Aops  = A->ops;
1534     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1535     A->bops = Abops;
1536     A->ops  = Aops;
1537 
1538     PetscHeaderDestroy(B);
1539   }
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNC__
1544 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1545 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1546 {
1547   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1548   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1549   int ierr,s1,s2,s3;
1550 
1551   PetscFunctionBegin;
1552   if (ll)  {
1553     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1554     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1555     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1556     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1557     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1558   }
1559   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 #undef __FUNC__
1564 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1565 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1566 {
1567   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1568   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1569   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1570   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1571   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1572   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1573   MPI_Comm       comm = A->comm;
1574   MPI_Request    *send_waits,*recv_waits;
1575   MPI_Status     recv_status,*send_status;
1576   IS             istmp;
1577 
1578   PetscFunctionBegin;
1579   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1580   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1581 
1582   /*  first count number of contributors to each processor */
1583   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1584   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1585   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1586   for ( i=0; i<N; i++ ) {
1587     idx = rows[i];
1588     found = 0;
1589     for ( j=0; j<size; j++ ) {
1590       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1591         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1592       }
1593     }
1594     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1595   }
1596   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1597 
1598   /* inform other processors of number of messages and max length*/
1599   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1600   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1601   nrecvs = work[rank];
1602   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1603   nmax   = work[rank];
1604   PetscFree(work);
1605 
1606   /* post receives:   */
1607   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1608   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1609   for ( i=0; i<nrecvs; i++ ) {
1610     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1611   }
1612 
1613   /* do sends:
1614      1) starts[i] gives the starting index in svalues for stuff going to
1615      the ith processor
1616   */
1617   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1618   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1619   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1620   starts[0] = 0;
1621   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1622   for ( i=0; i<N; i++ ) {
1623     svalues[starts[owner[i]]++] = rows[i];
1624   }
1625   ISRestoreIndices(is,&rows);
1626 
1627   starts[0] = 0;
1628   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1629   count = 0;
1630   for ( i=0; i<size; i++ ) {
1631     if (procs[i]) {
1632       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1633     }
1634   }
1635   PetscFree(starts);
1636 
1637   base = owners[rank]*bs;
1638 
1639   /*  wait on receives */
1640   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1641   source = lens + nrecvs;
1642   count  = nrecvs; slen = 0;
1643   while (count) {
1644     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1645     /* unpack receives into our local space */
1646     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1647     source[imdex]  = recv_status.MPI_SOURCE;
1648     lens[imdex]  = n;
1649     slen += n;
1650     count--;
1651   }
1652   PetscFree(recv_waits);
1653 
1654   /* move the data into the send scatter */
1655   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1656   count = 0;
1657   for ( i=0; i<nrecvs; i++ ) {
1658     values = rvalues + i*nmax;
1659     for ( j=0; j<lens[i]; j++ ) {
1660       lrows[count++] = values[j] - base;
1661     }
1662   }
1663   PetscFree(rvalues); PetscFree(lens);
1664   PetscFree(owner); PetscFree(nprocs);
1665 
1666   /* actually zap the local rows */
1667   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1668   PLogObjectParent(A,istmp);
1669 
1670   /*
1671         Zero the required rows. If the "diagonal block" of the matrix
1672      is square and the user wishes to set the diagonal we use seperate
1673      code so that MatSetValues() is not called for each diagonal allocating
1674      new memory, thus calling lots of mallocs and slowing things down.
1675 
1676        Contributed by: Mathew Knepley
1677   */
1678   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1679   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1680   if (diag && (l->A->M == l->A->N)) {
1681     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1682   } else if (diag) {
1683     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1684     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1685       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1686 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1687     }
1688     for ( i = 0; i < slen; i++ ) {
1689       row  = lrows[i] + rstart_bs;
1690       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1691     }
1692     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694   } else {
1695     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1696   }
1697 
1698   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1699   PetscFree(lrows);
1700 
1701   /* wait on sends */
1702   if (nsends) {
1703     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1704     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1705     PetscFree(send_status);
1706   }
1707   PetscFree(send_waits); PetscFree(svalues);
1708 
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 #undef __FUNC__
1713 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1714 int MatPrintHelp_MPIBAIJ(Mat A)
1715 {
1716   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1717   MPI_Comm    comm = A->comm;
1718   static int  called = 0;
1719   int         ierr;
1720 
1721   PetscFunctionBegin;
1722   if (!a->rank) {
1723     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1724   }
1725   if (called) {PetscFunctionReturn(0);} else called = 1;
1726   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1727   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNC__
1732 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1733 int MatSetUnfactored_MPIBAIJ(Mat A)
1734 {
1735   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1736   int         ierr;
1737 
1738   PetscFunctionBegin;
1739   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1744 
1745 /* -------------------------------------------------------------------*/
1746 static struct _MatOps MatOps_Values = {
1747   MatSetValues_MPIBAIJ,
1748   MatGetRow_MPIBAIJ,
1749   MatRestoreRow_MPIBAIJ,
1750   MatMult_MPIBAIJ,
1751   MatMultAdd_MPIBAIJ,
1752   MatMultTrans_MPIBAIJ,
1753   MatMultTransAdd_MPIBAIJ,
1754   0,
1755   0,
1756   0,
1757   0,
1758   0,
1759   0,
1760   0,
1761   MatTranspose_MPIBAIJ,
1762   MatGetInfo_MPIBAIJ,
1763   0,
1764   MatGetDiagonal_MPIBAIJ,
1765   MatDiagonalScale_MPIBAIJ,
1766   MatNorm_MPIBAIJ,
1767   MatAssemblyBegin_MPIBAIJ,
1768   MatAssemblyEnd_MPIBAIJ,
1769   0,
1770   MatSetOption_MPIBAIJ,
1771   MatZeroEntries_MPIBAIJ,
1772   MatZeroRows_MPIBAIJ,
1773   0,
1774   0,
1775   0,
1776   0,
1777   MatGetSize_MPIBAIJ,
1778   MatGetLocalSize_MPIBAIJ,
1779   MatGetOwnershipRange_MPIBAIJ,
1780   0,
1781   0,
1782   0,
1783   0,
1784   MatDuplicate_MPIBAIJ,
1785   0,
1786   0,
1787   0,
1788   0,
1789   0,
1790   MatGetSubMatrices_MPIBAIJ,
1791   MatIncreaseOverlap_MPIBAIJ,
1792   MatGetValues_MPIBAIJ,
1793   0,
1794   MatPrintHelp_MPIBAIJ,
1795   MatScale_MPIBAIJ,
1796   0,
1797   0,
1798   0,
1799   MatGetBlockSize_MPIBAIJ,
1800   0,
1801   0,
1802   0,
1803   0,
1804   0,
1805   0,
1806   MatSetUnfactored_MPIBAIJ,
1807   0,
1808   MatSetValuesBlocked_MPIBAIJ,
1809   0,
1810   0,
1811   0,
1812   MatGetMaps_Petsc};
1813 
1814 
1815 EXTERN_C_BEGIN
1816 #undef __FUNC__
1817 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1818 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1819 {
1820   PetscFunctionBegin;
1821   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1822   *iscopy = PETSC_FALSE;
1823   PetscFunctionReturn(0);
1824 }
1825 EXTERN_C_END
1826 
1827 #undef __FUNC__
1828 #define __FUNC__ "MatCreateMPIBAIJ"
1829 /*@C
1830    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1831    (block compressed row).  For good matrix assembly performance
1832    the user should preallocate the matrix storage by setting the parameters
1833    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1834    performance can be increased by more than a factor of 50.
1835 
1836    Collective on MPI_Comm
1837 
1838    Input Parameters:
1839 +  comm - MPI communicator
1840 .  bs   - size of blockk
1841 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1842            This value should be the same as the local size used in creating the
1843            y vector for the matrix-vector product y = Ax.
1844 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1845            This value should be the same as the local size used in creating the
1846            x vector for the matrix-vector product y = Ax.
1847 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1848 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1849 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1850            submatrix  (same for all local rows)
1851 .  d_nzz - array containing the number of block nonzeros in the various block rows
1852            of the in diagonal portion of the local (possibly different for each block
1853            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1854 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1855            submatrix (same for all local rows).
1856 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1857            off-diagonal portion of the local submatrix (possibly different for
1858            each block row) or PETSC_NULL.
1859 
1860    Output Parameter:
1861 .  A - the matrix
1862 
1863    Options Database Keys:
1864 .   -mat_no_unroll - uses code that does not unroll the loops in the
1865                      block calculations (much slower)
1866 .   -mat_block_size - size of the blocks to use
1867 .   -mat_mpi - use the parallel matrix data structures even on one processor
1868                (defaults to using SeqBAIJ format on one processor)
1869 
1870    Notes:
1871    The user MUST specify either the local or global matrix dimensions
1872    (possibly both).
1873 
1874    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1875    than it must be used on all processors that share the object for that argument.
1876 
1877    Storage Information:
1878    For a square global matrix we define each processor's diagonal portion
1879    to be its local rows and the corresponding columns (a square submatrix);
1880    each processor's off-diagonal portion encompasses the remainder of the
1881    local matrix (a rectangular submatrix).
1882 
1883    The user can specify preallocated storage for the diagonal part of
1884    the local submatrix with either d_nz or d_nnz (not both).  Set
1885    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1886    memory allocation.  Likewise, specify preallocated storage for the
1887    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1888 
1889    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1890    the figure below we depict these three local rows and all columns (0-11).
1891 
1892 .vb
1893            0 1 2 3 4 5 6 7 8 9 10 11
1894           -------------------
1895    row 3  |  o o o d d d o o o o o o
1896    row 4  |  o o o d d d o o o o o o
1897    row 5  |  o o o d d d o o o o o o
1898           -------------------
1899 .ve
1900 
1901    Thus, any entries in the d locations are stored in the d (diagonal)
1902    submatrix, and any entries in the o locations are stored in the
1903    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1904    stored simply in the MATSEQBAIJ format for compressed row storage.
1905 
1906    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1907    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1908    In general, for PDE problems in which most nonzeros are near the diagonal,
1909    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1910    or you will get TERRIBLE performance; see the users' manual chapter on
1911    matrices.
1912 
1913    Level: intermediate
1914 
1915 .keywords: matrix, block, aij, compressed row, sparse, parallel
1916 
1917 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1918 @*/
1919 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1920                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1921 {
1922   Mat          B;
1923   Mat_MPIBAIJ  *b;
1924   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1925   int          flag1 = 0,flag2 = 0;
1926 
1927   PetscFunctionBegin;
1928   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1929 
1930   MPI_Comm_size(comm,&size);
1931   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
1932   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1933   if (!flag1 && !flag2 && size == 1) {
1934     if (M == PETSC_DECIDE) M = m;
1935     if (N == PETSC_DECIDE) N = n;
1936     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1937     PetscFunctionReturn(0);
1938   }
1939 
1940   *A = 0;
1941   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
1942   PLogObjectCreate(B);
1943   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1944   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1945   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1946 
1947   B->ops->destroy    = MatDestroy_MPIBAIJ;
1948   B->ops->view       = MatView_MPIBAIJ;
1949   B->mapping    = 0;
1950   B->factor     = 0;
1951   B->assembled  = PETSC_FALSE;
1952 
1953   B->insertmode = NOT_SET_VALUES;
1954   MPI_Comm_rank(comm,&b->rank);
1955   MPI_Comm_size(comm,&b->size);
1956 
1957   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1958     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1959   }
1960   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1961     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1962   }
1963   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1964     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1965   }
1966   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1967   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1968 
1969   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1970     work[0] = m; work[1] = n;
1971     mbs = m/bs; nbs = n/bs;
1972     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1973     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1974     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1975   }
1976   if (m == PETSC_DECIDE) {
1977     Mbs = M/bs;
1978     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1979     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1980     m   = mbs*bs;
1981   }
1982   if (n == PETSC_DECIDE) {
1983     Nbs = N/bs;
1984     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1985     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1986     n   = nbs*bs;
1987   }
1988   if (mbs*bs != m || nbs*bs != n) {
1989     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1990   }
1991 
1992   b->m = m; B->m = m;
1993   b->n = n; B->n = n;
1994   b->N = N; B->N = N;
1995   b->M = M; B->M = M;
1996   b->bs  = bs;
1997   b->bs2 = bs*bs;
1998   b->mbs = mbs;
1999   b->nbs = nbs;
2000   b->Mbs = Mbs;
2001   b->Nbs = Nbs;
2002 
2003   /* the information in the maps duplicates the information computed below, eventually
2004      we should remove the duplicate information that is not contained in the maps */
2005   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2006   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2007 
2008   /* build local table of row and column ownerships */
2009   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
2010   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2011   b->cowners    = b->rowners + b->size + 2;
2012   b->rowners_bs = b->cowners + b->size + 2;
2013   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2014   b->rowners[0]    = 0;
2015   for ( i=2; i<=b->size; i++ ) {
2016     b->rowners[i] += b->rowners[i-1];
2017   }
2018   for ( i=0; i<=b->size; i++ ) {
2019     b->rowners_bs[i] = b->rowners[i]*bs;
2020   }
2021   b->rstart    = b->rowners[b->rank];
2022   b->rend      = b->rowners[b->rank+1];
2023   b->rstart_bs = b->rstart * bs;
2024   b->rend_bs   = b->rend * bs;
2025 
2026   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2027   b->cowners[0] = 0;
2028   for ( i=2; i<=b->size; i++ ) {
2029     b->cowners[i] += b->cowners[i-1];
2030   }
2031   b->cstart    = b->cowners[b->rank];
2032   b->cend      = b->cowners[b->rank+1];
2033   b->cstart_bs = b->cstart * bs;
2034   b->cend_bs   = b->cend * bs;
2035 
2036 
2037   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2038   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2039   PLogObjectParent(B,b->A);
2040   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2041   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2042   PLogObjectParent(B,b->B);
2043 
2044   /* build cache for off array entries formed */
2045   ierr = StashCreate_Private(comm,1,&b->stash); CHKERRQ(ierr);
2046   ierr = StashCreate_Private(comm,bs,&b->bstash); CHKERRQ(ierr);
2047   b->donotstash  = 0;
2048   b->colmap      = 0;
2049   b->garray      = 0;
2050   b->roworiented = 1;
2051 
2052   /* stuff used in block assembly */
2053   b->barray       = 0;
2054 
2055   /* stuff used for matrix vector multiply */
2056   b->lvec         = 0;
2057   b->Mvctx        = 0;
2058 
2059   /* stuff for MatGetRow() */
2060   b->rowindices   = 0;
2061   b->rowvalues    = 0;
2062   b->getrowactive = PETSC_FALSE;
2063 
2064   /* hash table stuff */
2065   b->ht           = 0;
2066   b->hd           = 0;
2067   b->ht_size      = 0;
2068   b->ht_flag      = 0;
2069   b->ht_fact      = 0;
2070   b->ht_total_ct  = 0;
2071   b->ht_insert_ct = 0;
2072 
2073   *A = B;
2074   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2075   if (flg) {
2076     double fact = 1.39;
2077     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2078     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2079     if (fact <= 1.0) fact = 1.39;
2080     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2081     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2082   }
2083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2084                                      "MatGetDiagonalBlock_MPIBAIJ",
2085                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 #undef __FUNC__
2090 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2091 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2092 {
2093   Mat         mat;
2094   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2095   int         ierr, len=0, flg;
2096 
2097   PetscFunctionBegin;
2098   *newmat       = 0;
2099   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2100   PLogObjectCreate(mat);
2101   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2102   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2103   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2104   mat->ops->view       = MatView_MPIBAIJ;
2105   mat->factor          = matin->factor;
2106   mat->assembled       = PETSC_TRUE;
2107   mat->insertmode      = NOT_SET_VALUES;
2108 
2109   a->m = mat->m   = oldmat->m;
2110   a->n = mat->n   = oldmat->n;
2111   a->M = mat->M   = oldmat->M;
2112   a->N = mat->N   = oldmat->N;
2113 
2114   a->bs  = oldmat->bs;
2115   a->bs2 = oldmat->bs2;
2116   a->mbs = oldmat->mbs;
2117   a->nbs = oldmat->nbs;
2118   a->Mbs = oldmat->Mbs;
2119   a->Nbs = oldmat->Nbs;
2120 
2121   a->rstart       = oldmat->rstart;
2122   a->rend         = oldmat->rend;
2123   a->cstart       = oldmat->cstart;
2124   a->cend         = oldmat->cend;
2125   a->size         = oldmat->size;
2126   a->rank         = oldmat->rank;
2127   a->donotstash   = oldmat->donotstash;
2128   a->roworiented  = oldmat->roworiented;
2129   a->rowindices   = 0;
2130   a->rowvalues    = 0;
2131   a->getrowactive = PETSC_FALSE;
2132   a->barray       = 0;
2133   a->rstart_bs    = oldmat->rstart_bs;
2134   a->rend_bs      = oldmat->rend_bs;
2135   a->cstart_bs    = oldmat->cstart_bs;
2136   a->cend_bs      = oldmat->cend_bs;
2137 
2138   /* hash table stuff */
2139   a->ht           = 0;
2140   a->hd           = 0;
2141   a->ht_size      = 0;
2142   a->ht_flag      = oldmat->ht_flag;
2143   a->ht_fact      = oldmat->ht_fact;
2144   a->ht_total_ct  = 0;
2145   a->ht_insert_ct = 0;
2146 
2147 
2148   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2149   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2150   a->cowners    = a->rowners + a->size + 2;
2151   a->rowners_bs = a->cowners + a->size + 2;
2152   PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));
2153   ierr = StashCreate_Private(matin->comm,1,&a->stash); CHKERRQ(ierr);
2154   ierr = StashCreate_Private(matin->comm,oldmat->bs,&a->bstash); CHKERRQ(ierr);
2155   if (oldmat->colmap) {
2156 #if defined (USE_CTABLE)
2157   ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
2158 #else
2159     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2160     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2161     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2162 #endif
2163   } else a->colmap = 0;
2164   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2165     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2166     PLogObjectMemory(mat,len*sizeof(int));
2167     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2168   } else a->garray = 0;
2169 
2170   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2171   PLogObjectParent(mat,a->lvec);
2172   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2173 
2174   PLogObjectParent(mat,a->Mvctx);
2175   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
2176   PLogObjectParent(mat,a->A);
2177   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
2178   PLogObjectParent(mat,a->B);
2179   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2180   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2181   if (flg) {
2182     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2183   }
2184   *newmat = mat;
2185   PetscFunctionReturn(0);
2186 }
2187 
2188 #include "sys.h"
2189 
2190 #undef __FUNC__
2191 #define __FUNC__ "MatLoad_MPIBAIJ"
2192 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2193 {
2194   Mat          A;
2195   int          i, nz, ierr, j,rstart, rend, fd;
2196   Scalar       *vals,*buf;
2197   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2198   MPI_Status   status;
2199   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2200   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2201   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2202   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2203   int          dcount,kmax,k,nzcount,tmp;
2204 
2205   PetscFunctionBegin;
2206   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2207 
2208   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2209   if (!rank) {
2210     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2211     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2212     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2213     if (header[3] < 0) {
2214       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2215     }
2216   }
2217 
2218   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2219   M = header[1]; N = header[2];
2220 
2221   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2222 
2223   /*
2224      This code adds extra rows to make sure the number of rows is
2225      divisible by the blocksize
2226   */
2227   Mbs        = M/bs;
2228   extra_rows = bs - M + bs*(Mbs);
2229   if (extra_rows == bs) extra_rows = 0;
2230   else                  Mbs++;
2231   if (extra_rows &&!rank) {
2232     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2233   }
2234 
2235   /* determine ownership of all rows */
2236   mbs = Mbs/size + ((Mbs % size) > rank);
2237   m   = mbs * bs;
2238   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2239   browners = rowners + size + 1;
2240   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2241   rowners[0] = 0;
2242   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2243   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2244   rstart = rowners[rank];
2245   rend   = rowners[rank+1];
2246 
2247   /* distribute row lengths to all processors */
2248   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2249   if (!rank) {
2250     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2251     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2252     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2253     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2254     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2255     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2256     PetscFree(sndcounts);
2257   } else {
2258     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2259   }
2260 
2261   if (!rank) {
2262     /* calculate the number of nonzeros on each processor */
2263     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2264     PetscMemzero(procsnz,size*sizeof(int));
2265     for ( i=0; i<size; i++ ) {
2266       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2267         procsnz[i] += rowlengths[j];
2268       }
2269     }
2270     PetscFree(rowlengths);
2271 
2272     /* determine max buffer needed and allocate it */
2273     maxnz = 0;
2274     for ( i=0; i<size; i++ ) {
2275       maxnz = PetscMax(maxnz,procsnz[i]);
2276     }
2277     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2278 
2279     /* read in my part of the matrix column indices  */
2280     nz = procsnz[0];
2281     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2282     mycols = ibuf;
2283     if (size == 1)  nz -= extra_rows;
2284     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2285     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2286 
2287     /* read in every ones (except the last) and ship off */
2288     for ( i=1; i<size-1; i++ ) {
2289       nz   = procsnz[i];
2290       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2291       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2292     }
2293     /* read in the stuff for the last proc */
2294     if ( size != 1 ) {
2295       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2296       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2297       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2298       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2299     }
2300     PetscFree(cols);
2301   } else {
2302     /* determine buffer space needed for message */
2303     nz = 0;
2304     for ( i=0; i<m; i++ ) {
2305       nz += locrowlens[i];
2306     }
2307     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2308     mycols = ibuf;
2309     /* receive message of column indices*/
2310     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2311     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2312     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2313   }
2314 
2315   /* loop over local rows, determining number of off diagonal entries */
2316   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2317   odlens = dlens + (rend-rstart);
2318   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2319   PetscMemzero(mask,3*Mbs*sizeof(int));
2320   masked1 = mask    + Mbs;
2321   masked2 = masked1 + Mbs;
2322   rowcount = 0; nzcount = 0;
2323   for ( i=0; i<mbs; i++ ) {
2324     dcount  = 0;
2325     odcount = 0;
2326     for ( j=0; j<bs; j++ ) {
2327       kmax = locrowlens[rowcount];
2328       for ( k=0; k<kmax; k++ ) {
2329         tmp = mycols[nzcount++]/bs;
2330         if (!mask[tmp]) {
2331           mask[tmp] = 1;
2332           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2333           else masked1[dcount++] = tmp;
2334         }
2335       }
2336       rowcount++;
2337     }
2338 
2339     dlens[i]  = dcount;
2340     odlens[i] = odcount;
2341 
2342     /* zero out the mask elements we set */
2343     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2344     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2345   }
2346 
2347   /* create our matrix */
2348   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2349          CHKERRQ(ierr);
2350   A = *newmat;
2351   MatSetOption(A,MAT_COLUMNS_SORTED);
2352 
2353   if (!rank) {
2354     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2355     /* read in my part of the matrix numerical values  */
2356     nz = procsnz[0];
2357     vals = buf;
2358     mycols = ibuf;
2359     if (size == 1)  nz -= extra_rows;
2360     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2361     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2362 
2363     /* insert into matrix */
2364     jj      = rstart*bs;
2365     for ( i=0; i<m; i++ ) {
2366       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2367       mycols += locrowlens[i];
2368       vals   += locrowlens[i];
2369       jj++;
2370     }
2371     /* read in other processors (except the last one) and ship out */
2372     for ( i=1; i<size-1; i++ ) {
2373       nz   = procsnz[i];
2374       vals = buf;
2375       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2376       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2377     }
2378     /* the last proc */
2379     if ( size != 1 ){
2380       nz   = procsnz[i] - extra_rows;
2381       vals = buf;
2382       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2383       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2384       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2385     }
2386     PetscFree(procsnz);
2387   } else {
2388     /* receive numeric values */
2389     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2390 
2391     /* receive message of values*/
2392     vals   = buf;
2393     mycols = ibuf;
2394     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2395     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2396     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2397 
2398     /* insert into matrix */
2399     jj      = rstart*bs;
2400     for ( i=0; i<m; i++ ) {
2401       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2402       mycols += locrowlens[i];
2403       vals   += locrowlens[i];
2404       jj++;
2405     }
2406   }
2407   PetscFree(locrowlens);
2408   PetscFree(buf);
2409   PetscFree(ibuf);
2410   PetscFree(rowners);
2411   PetscFree(dlens);
2412   PetscFree(mask);
2413   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2414   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 
2419 
2420 #undef __FUNC__
2421 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2422 /*@
2423    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2424 
2425    Input Parameters:
2426 .  mat  - the matrix
2427 .  fact - factor
2428 
2429    Collective on Mat
2430 
2431    Level: advanced
2432 
2433   Notes:
2434    This can also be set by the command line option: -mat_use_hash_table fact
2435 
2436 .keywords: matrix, hashtable, factor, HT
2437 
2438 .seealso: MatSetOption()
2439 @*/
2440 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2441 {
2442   Mat_MPIBAIJ *baij;
2443 
2444   PetscFunctionBegin;
2445   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2446   if (mat->type != MATMPIBAIJ) {
2447       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2448   }
2449   baij = (Mat_MPIBAIJ*) mat->data;
2450   baij->ht_fact = fact;
2451   PetscFunctionReturn(0);
2452 }
2453