xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 74cdf0df7c4b0e39a3e5b5b72d8b6c3b3328fa68)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.119 1998/04/27 03:52:09 curfman Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"         /*I "mat.h" I*/
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 
10 extern int MatSetUpMultiply_MPIBAIJ(Mat);
11 extern int DisAssemble_MPIBAIJ(Mat);
12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
14 
15 /*
16      Local utility routine that creates a mapping from the global column
17    number to the local number in the off-diagonal part of the local
18    storage of the matrix.  This is done in a non scable way since the
19    length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
23 static int CreateColmap_MPIBAIJ_Private(Mat mat)
24 {
25   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
26   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
27   int         nbs = B->nbs,i,bs=B->bs;;
28 
29   PetscFunctionBegin;
30   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
31   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
32   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
33   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
34   PetscFunctionReturn(0);
35 }
36 
37 #define CHUNKSIZE  10
38 
39 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
40 { \
41  \
42     brow = row/bs;  \
43     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
44     rmax = aimax[brow]; nrow = ailen[brow]; \
45       bcol = col/bs; \
46       ridx = row % bs; cidx = col % bs; \
47       low = 0; high = nrow; \
48       while (high-low > 3) { \
49         t = (low+high)/2; \
50         if (rp[t] > bcol) high = t; \
51         else              low  = t; \
52       } \
53       for ( _i=low; _i<high; _i++ ) { \
54         if (rp[_i] > bcol) break; \
55         if (rp[_i] == bcol) { \
56           bap  = ap +  bs2*_i + bs*cidx + ridx; \
57           if (addv == ADD_VALUES) *bap += value;  \
58           else                    *bap  = value;  \
59           goto a_noinsert; \
60         } \
61       } \
62       if (a->nonew == 1) goto a_noinsert; \
63       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
64       if (nrow >= rmax) { \
65         /* there is no extra room in row, therefore enlarge */ \
66         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
67         Scalar *new_a; \
68  \
69         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
70  \
71         /* malloc new storage space */ \
72         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
73         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
74         new_j   = (int *) (new_a + bs2*new_nz); \
75         new_i   = new_j + new_nz; \
76  \
77         /* copy over old data into new slots */ \
78         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
79         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
80         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
81         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
82         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
83                                                            len*sizeof(int)); \
84         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
85         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
86         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
87                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
88         /* free up old matrix storage */ \
89         PetscFree(a->a);  \
90         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
91         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
92         a->singlemalloc = 1; \
93  \
94         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
96         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
97         a->maxnz += bs2*CHUNKSIZE; \
98         a->reallocs++; \
99         a->nz++; \
100       } \
101       N = nrow++ - 1;  \
102       /* shift up all the later entries in this row */ \
103       for ( ii=N; ii>=_i; ii-- ) { \
104         rp[ii+1] = rp[ii]; \
105         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
106       } \
107       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
108       rp[_i]                      = bcol;  \
109       ap[bs2*_i + bs*cidx + ridx] = value;  \
110       a_noinsert:; \
111     ailen[brow] = nrow; \
112 }
113 
114 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
115 { \
116  \
117     brow = row/bs;  \
118     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
119     rmax = bimax[brow]; nrow = bilen[brow]; \
120       bcol = col/bs; \
121       ridx = row % bs; cidx = col % bs; \
122       low = 0; high = nrow; \
123       while (high-low > 3) { \
124         t = (low+high)/2; \
125         if (rp[t] > bcol) high = t; \
126         else              low  = t; \
127       } \
128       for ( _i=low; _i<high; _i++ ) { \
129         if (rp[_i] > bcol) break; \
130         if (rp[_i] == bcol) { \
131           bap  = ap +  bs2*_i + bs*cidx + ridx; \
132           if (addv == ADD_VALUES) *bap += value;  \
133           else                    *bap  = value;  \
134           goto b_noinsert; \
135         } \
136       } \
137       if (b->nonew == 1) goto b_noinsert; \
138       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139       if (nrow >= rmax) { \
140         /* there is no extra room in row, therefore enlarge */ \
141         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
142         Scalar *new_a; \
143  \
144         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
145  \
146         /* malloc new storage space */ \
147         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
148         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
149         new_j   = (int *) (new_a + bs2*new_nz); \
150         new_i   = new_j + new_nz; \
151  \
152         /* copy over old data into new slots */ \
153         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
154         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
156         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
157         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
158                                                            len*sizeof(int)); \
159         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
160         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
161         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
162                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
163         /* free up old matrix storage */ \
164         PetscFree(b->a);  \
165         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
166         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
167         b->singlemalloc = 1; \
168  \
169         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
171         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
172         b->maxnz += bs2*CHUNKSIZE; \
173         b->reallocs++; \
174         b->nz++; \
175       } \
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for ( ii=N; ii>=_i; ii-- ) { \
179         rp[ii+1] = rp[ii]; \
180         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
181       } \
182       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNC__
190 #define __FUNC__ "MatSetValues_MPIBAIJ"
191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
192 {
193   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
194   Scalar      value;
195   int         ierr,i,j,row,col;
196   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
197   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
198   int         cend_orig=baij->cend_bs,bs=baij->bs;
199 
200   /* Some Variables required in the macro */
201   Mat         A = baij->A;
202   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
203   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204   Scalar      *aa=a->a;
205 
206   Mat         B = baij->B;
207   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
208   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209   Scalar      *ba=b->a;
210 
211   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
212   int         low,high,t,ridx,cidx,bs2=a->bs2;
213   Scalar      *ap,*bap;
214 
215   PetscFunctionBegin;
216   for ( i=0; i<m; i++ ) {
217 #if defined(USE_PETSC_BOPT_g)
218     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
219     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for ( j=0; j<n; j++ ) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig){
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         }
230 #if defined(USE_PETSC_BOPT_g)
231         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
232         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
240             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
241               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
244               B = baij->B;
245               b = (Mat_SeqBAIJ *) (B)->data;
246               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
247               ba=b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     } else {
256       if (roworiented && !baij->donotstash) {
257         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258       } else {
259         if (!baij->donotstash) {
260           row = im[i];
261 	  for ( j=0; j<n; j++ ) {
262 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
263           }
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
272 #undef __FUNC__
273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
275 {
276   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
277   Scalar      *value,*barray=baij->barray;
278   int         ierr,i,j,ii,jj,row,col,k,l;
279   int         roworiented = baij->roworiented,rstart=baij->rstart ;
280   int         rend=baij->rend,cstart=baij->cstart,stepval;
281   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
282 
283   if(!barray) {
284     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
285   }
286 
287   if (roworiented) {
288     stepval = (n-1)*bs;
289   } else {
290     stepval = (m-1)*bs;
291   }
292   for ( i=0; i<m; i++ ) {
293 #if defined(USE_PETSC_BOPT_g)
294     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
295     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
296 #endif
297     if (im[i] >= rstart && im[i] < rend) {
298       row = im[i] - rstart;
299       for ( j=0; j<n; j++ ) {
300         /* If NumCol = 1 then a copy is not required */
301         if ((roworiented) && (n == 1)) {
302           barray = v + i*bs2;
303         } else if((!roworiented) && (m == 1)) {
304           barray = v + j*bs2;
305         } else { /* Here a copy is required */
306           if (roworiented) {
307             value = v + i*(stepval+bs)*bs + j*bs;
308           } else {
309             value = v + j*(stepval+bs)*bs + i*bs;
310           }
311           for ( ii=0; ii<bs; ii++,value+=stepval ) {
312             for (jj=0; jj<bs; jj++ ) {
313               *barray++  = *value++;
314             }
315           }
316           barray -=bs2;
317         }
318 
319         if (in[j] >= cstart && in[j] < cend){
320           col  = in[j] - cstart;
321           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
322         }
323 #if defined(USE_PETSC_BOPT_g)
324         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
325         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
326 #endif
327         else {
328           if (mat->was_assembled) {
329             if (!baij->colmap) {
330               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
331             }
332 
333 #if defined(USE_PETSC_BOPT_g)
334             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
335 #endif
336             col = (baij->colmap[in[j]] - 1)/bs;
337             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
338               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
339               col =  in[j];
340             }
341           }
342           else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (!baij->donotstash) {
348         if (roworiented ) {
349           row   = im[i]*bs;
350           value = v + i*(stepval+bs)*bs;
351           for ( j=0; j<bs; j++,row++ ) {
352             for ( k=0; k<n; k++ ) {
353               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
354                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
355               }
356             }
357           }
358         } else {
359           for ( j=0; j<n; j++ ) {
360             value = v + j*(stepval+bs)*bs + i*bs;
361             col   = in[j]*bs;
362             for ( k=0; k<bs; k++,col++,value+=stepval) {
363               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
364                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
365               }
366             }
367           }
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 #include <math.h>
375 #define HASH_KEY 0.6180339887
376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNC__
380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
382 {
383   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
384   int         ierr,i,j,row,col;
385   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
386   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
387   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
388   double      tmp;
389   Scalar      ** HD = baij->hd,value;
390 #if defined(USE_PETSC_BOPT_g)
391   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
392 #endif
393 
394   PetscFunctionBegin;
395 
396   for ( i=0; i<m; i++ ) {
397 #if defined(USE_PETSC_BOPT_g)
398     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
399     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
400 #endif
401       row = im[i];
402     if (row >= rstart_orig && row < rend_orig) {
403       for ( j=0; j<n; j++ ) {
404         col = in[j];
405         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
406         /* Look up into the Hash Table */
407         key = (row/bs)*Nbs+(col/bs)+1;
408         h1  = HASH(size,key,tmp);
409 
410 
411         idx = h1;
412 #if defined(USE_PETSC_BOPT_g)
413         insert_ct++;
414         total_ct++;
415         if (HT[idx] != key) {
416           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
417           if (idx == size) {
418             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
419             if (idx == h1) {
420               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
421             }
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
427           if (idx == size) {
428             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
429             if (idx == h1) {
430               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
431             }
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else {
440       if (roworiented && !baij->donotstash) {
441         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
442       } else {
443         if (!baij->donotstash) {
444           row = im[i];
445 	  for ( j=0; j<n; j++ ) {
446 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
447           }
448         }
449       }
450     }
451   }
452 #if defined(USE_PETSC_BOPT_g)
453   baij->ht_total_ct = total_ct;
454   baij->ht_insert_ct = insert_ct;
455 #endif
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNC__
460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
462 {
463   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
464   int         ierr,i,j,ii,jj,row,col,k,l;
465   int         roworiented = baij->roworiented,rstart=baij->rstart ;
466   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
467   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
468   double      tmp;
469   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
470 #if defined(USE_PETSC_BOPT_g)
471   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
472 #endif
473 
474   PetscFunctionBegin;
475 
476   if (roworiented) {
477     stepval = (n-1)*bs;
478   } else {
479     stepval = (m-1)*bs;
480   }
481   for ( i=0; i<m; i++ ) {
482 #if defined(USE_PETSC_BOPT_g)
483     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
484     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
485 #endif
486     row   = im[i];
487     v_t   = v + i*bs2;
488     if (row >= rstart && row < rend) {
489       for ( j=0; j<n; j++ ) {
490         col = in[j];
491 
492         /* Look up into the Hash Table */
493         key = row*Nbs+col+1;
494         h1  = HASH(size,key,tmp);
495 
496         idx = h1;
497 #if defined(USE_PETSC_BOPT_g)
498         total_ct++;
499         insert_ct++;
500        if (HT[idx] != key) {
501           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
502           if (idx == size) {
503             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
504             if (idx == h1) {
505               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
506             }
507           }
508         }
509 #else
510         if (HT[idx] != key) {
511           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
512           if (idx == size) {
513             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
514             if (idx == h1) {
515               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
516             }
517           }
518         }
519 #endif
520         baij_a = HD[idx];
521         if (roworiented) {
522           /*value = v + i*(stepval+bs)*bs + j*bs;*/
523           /* value = v + (i*(stepval+bs)+j)*bs; */
524           value = v_t;
525           v_t  += bs;
526           if (addv == ADD_VALUES) {
527             for ( ii=0; ii<bs; ii++,value+=stepval) {
528               for ( jj=ii; jj<bs2; jj+=bs ) {
529                 baij_a[jj]  += *value++;
530               }
531             }
532           } else {
533             for ( ii=0; ii<bs; ii++,value+=stepval) {
534               for ( jj=ii; jj<bs2; jj+=bs ) {
535                 baij_a[jj]  = *value++;
536               }
537             }
538           }
539         } else {
540           value = v + j*(stepval+bs)*bs + i*bs;
541           if (addv == ADD_VALUES) {
542             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
543               for ( jj=0; jj<bs; jj++ ) {
544                 baij_a[jj]  += *value++;
545               }
546             }
547           } else {
548             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
549               for ( jj=0; jj<bs; jj++ ) {
550                 baij_a[jj]  = *value++;
551               }
552             }
553           }
554         }
555       }
556     } else {
557       if (!baij->donotstash) {
558         if (roworiented ) {
559           row   = im[i]*bs;
560           value = v + i*(stepval+bs)*bs;
561           for ( j=0; j<bs; j++,row++ ) {
562             for ( k=0; k<n; k++ ) {
563               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
564                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
565               }
566             }
567           }
568         } else {
569           for ( j=0; j<n; j++ ) {
570             value = v + j*(stepval+bs)*bs + i*bs;
571             col   = in[j]*bs;
572             for ( k=0; k<bs; k++,col++,value+=stepval) {
573               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
574                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
575               }
576             }
577           }
578         }
579       }
580     }
581   }
582 #if defined(USE_PETSC_BOPT_g)
583   baij->ht_total_ct = total_ct;
584   baij->ht_insert_ct = insert_ct;
585 #endif
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNC__
590 #define __FUNC__ "MatGetValues_MPIBAIJ"
591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
592 {
593   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
594   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
595   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
596 
597   PetscFunctionBegin;
598   for ( i=0; i<m; i++ ) {
599     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
600     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
601     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
602       row = idxm[i] - bsrstart;
603       for ( j=0; j<n; j++ ) {
604         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
605         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
606         if (idxn[j] >= bscstart && idxn[j] < bscend){
607           col = idxn[j] - bscstart;
608           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
609         } else {
610           if (!baij->colmap) {
611             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
612           }
613           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
614              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
615           else {
616             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
617             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
618           }
619         }
620       }
621     } else {
622       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
623     }
624   }
625  PetscFunctionReturn(0);
626 }
627 
628 #undef __FUNC__
629 #define __FUNC__ "MatNorm_MPIBAIJ"
630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
631 {
632   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
633   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
634   int        ierr, i,bs2=baij->bs2;
635   double     sum = 0.0;
636   Scalar     *v;
637 
638   PetscFunctionBegin;
639   if (baij->size == 1) {
640     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
641   } else {
642     if (type == NORM_FROBENIUS) {
643       v = amat->a;
644       for (i=0; i<amat->nz*bs2; i++ ) {
645 #if defined(USE_PETSC_COMPLEX)
646         sum += real(conj(*v)*(*v)); v++;
647 #else
648         sum += (*v)*(*v); v++;
649 #endif
650       }
651       v = bmat->a;
652       for (i=0; i<bmat->nz*bs2; i++ ) {
653 #if defined(USE_PETSC_COMPLEX)
654         sum += real(conj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
660       *norm = sqrt(*norm);
661     } else {
662       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
663     }
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
673   MPI_Comm    comm = mat->comm;
674   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
675   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
676   MPI_Request *send_waits,*recv_waits;
677   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
678   InsertMode  addv;
679   Scalar      *rvalues,*svalues;
680 
681   PetscFunctionBegin;
682   /* make sure all processors are either in INSERTMODE or ADDMODE */
683   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
684   if (addv == (ADD_VALUES|INSERT_VALUES)) {
685     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
686   }
687   mat->insertmode = addv; /* in case this processor had no cache */
688 
689   /*  first count number of contributors to each processor */
690   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
691   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
692   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
693   for ( i=0; i<baij->stash.n; i++ ) {
694     idx = baij->stash.idx[i];
695     for ( j=0; j<size; j++ ) {
696       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
697         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
698       }
699     }
700   }
701   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
702 
703   /* inform other processors of number of messages and max length*/
704   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
705   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
706   nreceives = work[rank];
707   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
708   nmax      = work[rank];
709   PetscFree(work);
710 
711   /* post receives:
712        1) each message will consist of ordered pairs
713      (global index,value) we store the global index as a double
714      to simplify the message passing.
715        2) since we don't know how long each individual message is we
716      allocate the largest needed buffer for each receive. Potentially
717      this is a lot of wasted space.
718 
719 
720        This could be done better.
721   */
722   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
723   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
724   for ( i=0; i<nreceives; i++ ) {
725     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
726   }
727 
728   /* do sends:
729       1) starts[i] gives the starting index in svalues for stuff going to
730          the ith processor
731   */
732   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
733   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
734   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
735   starts[0] = 0;
736   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
737   for ( i=0; i<baij->stash.n; i++ ) {
738     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
739     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
740     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
741   }
742   PetscFree(owner);
743   starts[0] = 0;
744   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
745   count = 0;
746   for ( i=0; i<size; i++ ) {
747     if (procs[i]) {
748       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
749     }
750   }
751   PetscFree(starts); PetscFree(nprocs);
752 
753   /* Free cache space */
754   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
755   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
756 
757   baij->svalues    = svalues;    baij->rvalues    = rvalues;
758   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
759   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
760   baij->rmax       = nmax;
761 
762   PetscFunctionReturn(0);
763 }
764 
765 /*
766   Creates the hash table, and sets the table
767   This table is created only once.
768   If new entried need to be added to the matrix
769   then the hash table has to be destroyed and
770   recreated.
771 */
772 #undef __FUNC__
773 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
774 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
775 {
776   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
777   Mat         A = baij->A, B=baij->B;
778   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
779   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
780   int         size,bs2=baij->bs2,rstart=baij->rstart;
781   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
782   int         *HT,key;
783   Scalar      **HD;
784   double      tmp;
785 #if defined(USE_PETSC_BOPT_g)
786   int         ct=0,max=0;
787 #endif
788 
789   PetscFunctionBegin;
790   baij->ht_size=(int)(factor*nz);
791   size = baij->ht_size;
792 
793   if (baij->ht) {
794     PetscFunctionReturn(0);
795   }
796 
797   /* Allocate Memory for Hash Table */
798   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
799   baij->ht = (int*)(baij->hd + size);
800   HD = baij->hd;
801   HT = baij->ht;
802 
803 
804   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
805 
806 
807   /* Loop Over A */
808   for ( i=0; i<a->mbs; i++ ) {
809     for ( j=ai[i]; j<ai[i+1]; j++ ) {
810       row = i+rstart;
811       col = aj[j]+cstart;
812 
813       key = row*Nbs + col + 1;
814       h1  = HASH(size,key,tmp);
815       for ( k=0; k<size; k++ ){
816         if (HT[(h1+k)%size] == 0.0) {
817           HT[(h1+k)%size] = key;
818           HD[(h1+k)%size] = a->a + j*bs2;
819           break;
820 #if defined(USE_PETSC_BOPT_g)
821         } else {
822           ct++;
823 #endif
824         }
825       }
826 #if defined(USE_PETSC_BOPT_g)
827       if (k> max) max = k;
828 #endif
829     }
830   }
831   /* Loop Over B */
832   for ( i=0; i<b->mbs; i++ ) {
833     for ( j=bi[i]; j<bi[i+1]; j++ ) {
834       row = i+rstart;
835       col = garray[bj[j]];
836       key = row*Nbs + col + 1;
837       h1  = HASH(size,key,tmp);
838       for ( k=0; k<size; k++ ){
839         if (HT[(h1+k)%size] == 0.0) {
840           HT[(h1+k)%size] = key;
841           HD[(h1+k)%size] = b->a + j*bs2;
842           break;
843 #if defined(USE_PETSC_BOPT_g)
844         } else {
845           ct++;
846 #endif
847         }
848       }
849 #if defined(USE_PETSC_BOPT_g)
850       if (k> max) max = k;
851 #endif
852     }
853   }
854 
855   /* Print Summary */
856 #if defined(USE_PETSC_BOPT_g)
857   for ( i=0,j=0; i<size; i++)
858     if (HT[i]) {j++;}
859   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
860            (j== 0)? 0.0:((double)(ct+j))/j,max);
861 #endif
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNC__
866 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
867 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
868 {
869   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
870   MPI_Status  *send_status,recv_status;
871   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
872   int         bs=baij->bs,row,col,other_disassembled;
873   Scalar      *values,val;
874   InsertMode  addv = mat->insertmode;
875 
876   PetscFunctionBegin;
877   /*  wait on receives */
878   while (count) {
879     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
880     /* unpack receives into our local space */
881     values = baij->rvalues + 3*imdex*baij->rmax;
882     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
883     n = n/3;
884     for ( i=0; i<n; i++ ) {
885       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
886       col = (int) PetscReal(values[3*i+1]);
887       val = values[3*i+2];
888       if (col >= baij->cstart*bs && col < baij->cend*bs) {
889         col -= baij->cstart*bs;
890         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
891       } else {
892         if (mat->was_assembled) {
893           if (!baij->colmap) {
894             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
895           }
896           col = (baij->colmap[col/bs]) - 1 + col%bs;
897           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
898             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
899             col = (int) PetscReal(values[3*i+1]);
900           }
901         }
902         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
903       }
904     }
905     count--;
906   }
907   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
908 
909   /* wait on sends */
910   if (baij->nsends) {
911     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
912     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
913     PetscFree(send_status);
914   }
915   PetscFree(baij->send_waits); PetscFree(baij->svalues);
916 
917   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
918   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
919 
920   /* determine if any processor has disassembled, if so we must
921      also disassemble ourselfs, in order that we may reassemble. */
922   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
923   if (mat->was_assembled && !other_disassembled) {
924     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
929   }
930   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
932 
933 #if defined(USE_PETSC_BOPT_g)
934   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
935     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
936              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNC__
952 #define __FUNC__ "MatView_MPIBAIJ_Binary"
953 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
954 {
955   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
956   int          ierr;
957 
958   PetscFunctionBegin;
959   if (baij->size == 1) {
960     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
961   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNC__
966 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
967 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
968 {
969   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
970   int          ierr, format,rank,bs = baij->bs;
971   FILE         *fd;
972   ViewerType   vtype;
973 
974   PetscFunctionBegin;
975   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
976   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
977     ierr = ViewerGetFormat(viewer,&format);
978     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
979       MatInfo info;
980       MPI_Comm_rank(mat->comm,&rank);
981       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
982       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
983       PetscSequentialPhaseBegin(mat->comm,1);
984       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
985               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
986               baij->bs,(int)info.memory);
987       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
988       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
989       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
990       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
991       fflush(fd);
992       PetscSequentialPhaseEnd(mat->comm,1);
993       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
994       PetscFunctionReturn(0);
995     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
996       PetscPrintf(mat->comm,"  block size is %d\n",bs);
997       PetscFunctionReturn(0);
998     }
999   }
1000 
1001   if (vtype == DRAW_VIEWER) {
1002     Draw       draw;
1003     PetscTruth isnull;
1004     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
1005     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1006   }
1007 
1008   if (vtype == ASCII_FILE_VIEWER) {
1009     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1010     PetscSequentialPhaseBegin(mat->comm,1);
1011     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
1012            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
1013             baij->cstart*bs,baij->cend*bs);
1014     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1015     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
1016     fflush(fd);
1017     PetscSequentialPhaseEnd(mat->comm,1);
1018   } else {
1019     int size = baij->size;
1020     rank = baij->rank;
1021     if (size == 1) {
1022       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1023     } else {
1024       /* assemble the entire matrix onto first processor. */
1025       Mat         A;
1026       Mat_SeqBAIJ *Aloc;
1027       int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
1028       int         mbs=baij->mbs;
1029       Scalar      *a;
1030 
1031       if (!rank) {
1032         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1033       } else {
1034         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1035       }
1036       PLogObjectParent(mat,A);
1037 
1038       /* copy over the A part */
1039       Aloc = (Mat_SeqBAIJ*) baij->A->data;
1040       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1041       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1042 
1043       for ( i=0; i<mbs; i++ ) {
1044         rvals[0] = bs*(baij->rstart + i);
1045         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1046         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1047           col = (baij->cstart+aj[j])*bs;
1048           for (k=0; k<bs; k++ ) {
1049             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1050             col++; a += bs;
1051           }
1052         }
1053       }
1054       /* copy over the B part */
1055       Aloc = (Mat_SeqBAIJ*) baij->B->data;
1056       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1057       for ( i=0; i<mbs; i++ ) {
1058         rvals[0] = bs*(baij->rstart + i);
1059         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1060         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1061           col = baij->garray[aj[j]]*bs;
1062           for (k=0; k<bs; k++ ) {
1063             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1064             col++; a += bs;
1065           }
1066         }
1067       }
1068       PetscFree(rvals);
1069       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1070       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1071       /*
1072          Everyone has to call to draw the matrix since the graphics waits are
1073          synchronized across all processors that share the Draw object
1074       */
1075       if (!rank || vtype == DRAW_VIEWER) {
1076         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1077       }
1078       ierr = MatDestroy(A); CHKERRQ(ierr);
1079     }
1080   }
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 
1085 
1086 #undef __FUNC__
1087 #define __FUNC__ "MatView_MPIBAIJ"
1088 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1089 {
1090   int         ierr;
1091   ViewerType  vtype;
1092 
1093   PetscFunctionBegin;
1094   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1095   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
1096       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
1097     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
1098   } else if (vtype == BINARY_FILE_VIEWER) {
1099     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1100   } else {
1101     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1102   }
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNC__
1107 #define __FUNC__ "MatDestroy_MPIBAIJ"
1108 int MatDestroy_MPIBAIJ(Mat mat)
1109 {
1110   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1111   int         ierr;
1112 
1113   PetscFunctionBegin;
1114 #if defined(USE_PETSC_LOG)
1115   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1116 #endif
1117 
1118   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1119   PetscFree(baij->rowners);
1120   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1121   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1122   if (baij->colmap) PetscFree(baij->colmap);
1123   if (baij->garray) PetscFree(baij->garray);
1124   if (baij->lvec)   VecDestroy(baij->lvec);
1125   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1126   if (baij->rowvalues) PetscFree(baij->rowvalues);
1127   if (baij->barray) PetscFree(baij->barray);
1128   if (baij->hd) PetscFree(baij->hd);
1129   PetscFree(baij);
1130   PLogObjectDestroy(mat);
1131   PetscHeaderDestroy(mat);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNC__
1136 #define __FUNC__ "MatMult_MPIBAIJ"
1137 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1138 {
1139   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1140   int         ierr, nt;
1141 
1142   PetscFunctionBegin;
1143   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1144   if (nt != a->n) {
1145     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1146   }
1147   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1148   if (nt != a->m) {
1149     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1150   }
1151   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1152   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1153   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1154   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1155   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNC__
1160 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1161 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1162 {
1163   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1164   int        ierr;
1165 
1166   PetscFunctionBegin;
1167   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1168   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1169   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1170   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNC__
1175 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1176 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1177 {
1178   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1179   int         ierr;
1180 
1181   PetscFunctionBegin;
1182   /* do nondiagonal part */
1183   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1184   /* send it on its way */
1185   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1186   /* do local part */
1187   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1188   /* receive remote parts: note this assumes the values are not actually */
1189   /* inserted in yy until the next line, which is true for my implementation*/
1190   /* but is not perhaps always true. */
1191   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNC__
1196 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1197 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1198 {
1199   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1200   int         ierr;
1201 
1202   PetscFunctionBegin;
1203   /* do nondiagonal part */
1204   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1205   /* send it on its way */
1206   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1207   /* do local part */
1208   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1209   /* receive remote parts: note this assumes the values are not actually */
1210   /* inserted in yy until the next line, which is true for my implementation*/
1211   /* but is not perhaps always true. */
1212   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 /*
1217   This only works correctly for square matrices where the subblock A->A is the
1218    diagonal block
1219 */
1220 #undef __FUNC__
1221 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1222 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1223 {
1224   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1225   int         ierr;
1226 
1227   PetscFunctionBegin;
1228   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1229   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNC__
1234 #define __FUNC__ "MatScale_MPIBAIJ"
1235 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1236 {
1237   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1238   int         ierr;
1239 
1240   PetscFunctionBegin;
1241   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1242   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNC__
1247 #define __FUNC__ "MatGetSize_MPIBAIJ"
1248 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1249 {
1250   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1251 
1252   PetscFunctionBegin;
1253   if (m) *m = mat->M;
1254   if (n) *n = mat->N;
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNC__
1259 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1260 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1261 {
1262   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1263 
1264   PetscFunctionBegin;
1265   *m = mat->m; *n = mat->n;
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNC__
1270 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1271 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1272 {
1273   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1274 
1275   PetscFunctionBegin;
1276   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1281 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1282 
1283 #undef __FUNC__
1284 #define __FUNC__ "MatGetRow_MPIBAIJ"
1285 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1286 {
1287   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1288   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1289   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1290   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1291   int        *cmap, *idx_p,cstart = mat->cstart;
1292 
1293   PetscFunctionBegin;
1294   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1295   mat->getrowactive = PETSC_TRUE;
1296 
1297   if (!mat->rowvalues && (idx || v)) {
1298     /*
1299         allocate enough space to hold information from the longest row.
1300     */
1301     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1302     int     max = 1,mbs = mat->mbs,tmp;
1303     for ( i=0; i<mbs; i++ ) {
1304       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1305       if (max < tmp) { max = tmp; }
1306     }
1307     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1308     CHKPTRQ(mat->rowvalues);
1309     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1310   }
1311 
1312   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1313   lrow = row - brstart;
1314 
1315   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1316   if (!v)   {pvA = 0; pvB = 0;}
1317   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1318   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1319   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1320   nztot = nzA + nzB;
1321 
1322   cmap  = mat->garray;
1323   if (v  || idx) {
1324     if (nztot) {
1325       /* Sort by increasing column numbers, assuming A and B already sorted */
1326       int imark = -1;
1327       if (v) {
1328         *v = v_p = mat->rowvalues;
1329         for ( i=0; i<nzB; i++ ) {
1330           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1331           else break;
1332         }
1333         imark = i;
1334         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1335         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1336       }
1337       if (idx) {
1338         *idx = idx_p = mat->rowindices;
1339         if (imark > -1) {
1340           for ( i=0; i<imark; i++ ) {
1341             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1342           }
1343         } else {
1344           for ( i=0; i<nzB; i++ ) {
1345             if (cmap[cworkB[i]/bs] < cstart)
1346               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1347             else break;
1348           }
1349           imark = i;
1350         }
1351         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1352         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1353       }
1354     } else {
1355       if (idx) *idx = 0;
1356       if (v)   *v   = 0;
1357     }
1358   }
1359   *nz = nztot;
1360   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1361   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNC__
1366 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1367 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1368 {
1369   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1370 
1371   PetscFunctionBegin;
1372   if (baij->getrowactive == PETSC_FALSE) {
1373     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1374   }
1375   baij->getrowactive = PETSC_FALSE;
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNC__
1380 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1381 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1382 {
1383   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1384 
1385   PetscFunctionBegin;
1386   *bs = baij->bs;
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #undef __FUNC__
1391 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1392 int MatZeroEntries_MPIBAIJ(Mat A)
1393 {
1394   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1395   int         ierr;
1396 
1397   PetscFunctionBegin;
1398   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1399   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNC__
1404 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1405 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1406 {
1407   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1408   Mat         A = a->A, B = a->B;
1409   int         ierr;
1410   double      isend[5], irecv[5];
1411 
1412   PetscFunctionBegin;
1413   info->block_size     = (double)a->bs;
1414   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1415   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
1416   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1417   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
1418   if (flag == MAT_LOCAL) {
1419     info->nz_used      = isend[0];
1420     info->nz_allocated = isend[1];
1421     info->nz_unneeded  = isend[2];
1422     info->memory       = isend[3];
1423     info->mallocs      = isend[4];
1424   } else if (flag == MAT_GLOBAL_MAX) {
1425     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr);
1426     info->nz_used      = irecv[0];
1427     info->nz_allocated = irecv[1];
1428     info->nz_unneeded  = irecv[2];
1429     info->memory       = irecv[3];
1430     info->mallocs      = irecv[4];
1431   } else if (flag == MAT_GLOBAL_SUM) {
1432     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr);
1433     info->nz_used      = irecv[0];
1434     info->nz_allocated = irecv[1];
1435     info->nz_unneeded  = irecv[2];
1436     info->memory       = irecv[3];
1437     info->mallocs      = irecv[4];
1438   }
1439   info->rows_global       = (double)a->M;
1440   info->columns_global    = (double)a->N;
1441   info->rows_local        = (double)a->m;
1442   info->columns_local     = (double)a->N;
1443   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1444   info->fill_ratio_needed = 0;
1445   info->factor_mallocs    = 0;
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 #undef __FUNC__
1450 #define __FUNC__ "MatSetOption_MPIBAIJ"
1451 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1452 {
1453   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1454 
1455   PetscFunctionBegin;
1456   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1457       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1458       op == MAT_COLUMNS_UNSORTED ||
1459       op == MAT_COLUMNS_SORTED ||
1460       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1461       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1462         MatSetOption(a->A,op);
1463         MatSetOption(a->B,op);
1464   } else if (op == MAT_ROW_ORIENTED) {
1465         a->roworiented = 1;
1466         MatSetOption(a->A,op);
1467         MatSetOption(a->B,op);
1468   } else if (op == MAT_ROWS_SORTED ||
1469              op == MAT_ROWS_UNSORTED ||
1470              op == MAT_SYMMETRIC ||
1471              op == MAT_STRUCTURALLY_SYMMETRIC ||
1472              op == MAT_YES_NEW_DIAGONALS ||
1473              op == MAT_USE_HASH_TABLE)
1474     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1475   else if (op == MAT_COLUMN_ORIENTED) {
1476     a->roworiented = 0;
1477     MatSetOption(a->A,op);
1478     MatSetOption(a->B,op);
1479   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1480     a->donotstash = 1;
1481   } else if (op == MAT_NO_NEW_DIAGONALS) {
1482     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1483   } else if (op == MAT_USE_HASH_TABLE) {
1484     a->ht_flag = 1;
1485   } else {
1486     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1487   }
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 #undef __FUNC__
1492 #define __FUNC__ "MatTranspose_MPIBAIJ("
1493 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1494 {
1495   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1496   Mat_SeqBAIJ *Aloc;
1497   Mat        B;
1498   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1499   int        bs=baij->bs,mbs=baij->mbs;
1500   Scalar     *a;
1501 
1502   PetscFunctionBegin;
1503   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1504   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1505   CHKERRQ(ierr);
1506 
1507   /* copy over the A part */
1508   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1509   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1510   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1511 
1512   for ( i=0; i<mbs; i++ ) {
1513     rvals[0] = bs*(baij->rstart + i);
1514     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1515     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1516       col = (baij->cstart+aj[j])*bs;
1517       for (k=0; k<bs; k++ ) {
1518         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1519         col++; a += bs;
1520       }
1521     }
1522   }
1523   /* copy over the B part */
1524   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1525   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1526   for ( i=0; i<mbs; i++ ) {
1527     rvals[0] = bs*(baij->rstart + i);
1528     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1529     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1530       col = baij->garray[aj[j]]*bs;
1531       for (k=0; k<bs; k++ ) {
1532         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1533         col++; a += bs;
1534       }
1535     }
1536   }
1537   PetscFree(rvals);
1538   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1539   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1540 
1541   if (matout != PETSC_NULL) {
1542     *matout = B;
1543   } else {
1544     PetscOps       *Abops;
1545     struct _MatOps *Aops;
1546 
1547     /* This isn't really an in-place transpose .... but free data structures from baij */
1548     PetscFree(baij->rowners);
1549     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1550     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1551     if (baij->colmap) PetscFree(baij->colmap);
1552     if (baij->garray) PetscFree(baij->garray);
1553     if (baij->lvec) VecDestroy(baij->lvec);
1554     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1555     PetscFree(baij);
1556 
1557     /*
1558        This is horrible, horrible code. We need to keep the
1559       A pointers for the bops and ops but copy everything
1560       else from C.
1561     */
1562     Abops = A->bops;
1563     Aops  = A->ops;
1564     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1565     A->bops = Abops;
1566     A->ops  = Aops;
1567 
1568     PetscHeaderDestroy(B);
1569   }
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #undef __FUNC__
1574 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1575 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1576 {
1577   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1578   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1579   int ierr,s1,s2,s3;
1580 
1581   PetscFunctionBegin;
1582   if (ll)  {
1583     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1584     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1585     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1586     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1587     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1588   }
1589   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNC__
1594 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1595 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1596 {
1597   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1598   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1599   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1600   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1601   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1602   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1603   MPI_Comm       comm = A->comm;
1604   MPI_Request    *send_waits,*recv_waits;
1605   MPI_Status     recv_status,*send_status;
1606   IS             istmp;
1607   PetscTruth     localdiag;
1608 
1609   PetscFunctionBegin;
1610   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1611   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1612 
1613   /*  first count number of contributors to each processor */
1614   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1615   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1616   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1617   for ( i=0; i<N; i++ ) {
1618     idx = rows[i];
1619     found = 0;
1620     for ( j=0; j<size; j++ ) {
1621       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1622         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1623       }
1624     }
1625     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1626   }
1627   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1628 
1629   /* inform other processors of number of messages and max length*/
1630   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1631   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1632   nrecvs = work[rank];
1633   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1634   nmax   = work[rank];
1635   PetscFree(work);
1636 
1637   /* post receives:   */
1638   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1639   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1640   for ( i=0; i<nrecvs; i++ ) {
1641     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1642   }
1643 
1644   /* do sends:
1645      1) starts[i] gives the starting index in svalues for stuff going to
1646      the ith processor
1647   */
1648   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1649   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1650   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1651   starts[0] = 0;
1652   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1653   for ( i=0; i<N; i++ ) {
1654     svalues[starts[owner[i]]++] = rows[i];
1655   }
1656   ISRestoreIndices(is,&rows);
1657 
1658   starts[0] = 0;
1659   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1660   count = 0;
1661   for ( i=0; i<size; i++ ) {
1662     if (procs[i]) {
1663       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1664     }
1665   }
1666   PetscFree(starts);
1667 
1668   base = owners[rank]*bs;
1669 
1670   /*  wait on receives */
1671   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1672   source = lens + nrecvs;
1673   count  = nrecvs; slen = 0;
1674   while (count) {
1675     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1676     /* unpack receives into our local space */
1677     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1678     source[imdex]  = recv_status.MPI_SOURCE;
1679     lens[imdex]  = n;
1680     slen += n;
1681     count--;
1682   }
1683   PetscFree(recv_waits);
1684 
1685   /* move the data into the send scatter */
1686   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1687   count = 0;
1688   for ( i=0; i<nrecvs; i++ ) {
1689     values = rvalues + i*nmax;
1690     for ( j=0; j<lens[i]; j++ ) {
1691       lrows[count++] = values[j] - base;
1692     }
1693   }
1694   PetscFree(rvalues); PetscFree(lens);
1695   PetscFree(owner); PetscFree(nprocs);
1696 
1697   /* actually zap the local rows */
1698   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1699   PLogObjectParent(A,istmp);
1700 
1701   /*
1702         Zero the required rows. If the "diagonal block" of the matrix
1703      is square and the user wishes to set the diagonal we use seperate
1704      code so that MatSetValues() is not called for each diagonal allocating
1705      new memory, thus calling lots of mallocs and slowing things down.
1706 
1707        Contributed by: Mathew Knepley
1708   */
1709   localdiag = PETSC_FALSE;
1710   if (diag && (l->A->M == l->A->N)) {
1711     localdiag = PETSC_TRUE;
1712     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1713   } else {
1714     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1715   }
1716   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1717   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1718 
1719   if (diag && (localdiag == PETSC_FALSE)) {
1720     for ( i = 0; i < slen; i++ ) {
1721       row = lrows[i] + rstart_bs;
1722       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1723     }
1724     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1725     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1726   }
1727   PetscFree(lrows);
1728 
1729   /* wait on sends */
1730   if (nsends) {
1731     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1732     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1733     PetscFree(send_status);
1734   }
1735   PetscFree(send_waits); PetscFree(svalues);
1736 
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 extern int MatPrintHelp_SeqBAIJ(Mat);
1741 #undef __FUNC__
1742 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1743 int MatPrintHelp_MPIBAIJ(Mat A)
1744 {
1745   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1746   MPI_Comm    comm = A->comm;
1747   static int  called = 0;
1748   int         ierr;
1749 
1750   PetscFunctionBegin;
1751   if (!a->rank) {
1752     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1753   }
1754   if (called) {PetscFunctionReturn(0);} else called = 1;
1755   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1756   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNC__
1761 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1762 int MatSetUnfactored_MPIBAIJ(Mat A)
1763 {
1764   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1765   int         ierr;
1766 
1767   PetscFunctionBegin;
1768   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
1773 
1774 /* -------------------------------------------------------------------*/
1775 static struct _MatOps MatOps = {
1776   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
1777   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
1778   0,0,0,0,
1779   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
1780   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
1781   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
1782   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
1783   0,0,0,MatGetSize_MPIBAIJ,
1784   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1785   0,0,MatConvertSameType_MPIBAIJ,0,0,
1786   0,0,0,MatGetSubMatrices_MPIBAIJ,
1787   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1788   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1789   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
1790 
1791 
1792 #undef __FUNC__
1793 #define __FUNC__ "MatCreateMPIBAIJ"
1794 /*@C
1795    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1796    (block compressed row).  For good matrix assembly performance
1797    the user should preallocate the matrix storage by setting the parameters
1798    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1799    performance can be increased by more than a factor of 50.
1800 
1801    Collective on MPI_Comm
1802 
1803    Input Parameters:
1804 +  comm - MPI communicator
1805 .  bs   - size of blockk
1806 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1807            This value should be the same as the local size used in creating the
1808            y vector for the matrix-vector product y = Ax.
1809 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1810            This value should be the same as the local size used in creating the
1811            x vector for the matrix-vector product y = Ax.
1812 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1813 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1814 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1815            submatrix  (same for all local rows)
1816 .  d_nzz - array containing the number of block nonzeros in the various block rows
1817            of the in diagonal portion of the local (possibly different for each block
1818            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1819 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1820            submatrix (same for all local rows).
1821 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1822            off-diagonal portion of the local submatrix (possibly different for
1823            each block row) or PETSC_NULL.
1824 
1825    Output Parameter:
1826 .  A - the matrix
1827 
1828    Options Database Keys:
1829 .   -mat_no_unroll - uses code that does not unroll the loops in the
1830                      block calculations (much slower)
1831 .   -mat_block_size - size of the blocks to use
1832 
1833    Notes:
1834    The user MUST specify either the local or global matrix dimensions
1835    (possibly both).
1836 
1837    Storage Information:
1838    For a square global matrix we define each processor's diagonal portion
1839    to be its local rows and the corresponding columns (a square submatrix);
1840    each processor's off-diagonal portion encompasses the remainder of the
1841    local matrix (a rectangular submatrix).
1842 
1843    The user can specify preallocated storage for the diagonal part of
1844    the local submatrix with either d_nz or d_nnz (not both).  Set
1845    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1846    memory allocation.  Likewise, specify preallocated storage for the
1847    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1848 
1849    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1850    the figure below we depict these three local rows and all columns (0-11).
1851 
1852 .vb
1853            0 1 2 3 4 5 6 7 8 9 10 11
1854           -------------------
1855    row 3  |  o o o d d d o o o o o o
1856    row 4  |  o o o d d d o o o o o o
1857    row 5  |  o o o d d d o o o o o o
1858           -------------------
1859 .ve
1860 
1861    Thus, any entries in the d locations are stored in the d (diagonal)
1862    submatrix, and any entries in the o locations are stored in the
1863    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1864    stored simply in the MATSEQBAIJ format for compressed row storage.
1865 
1866    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1867    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1868    In general, for PDE problems in which most nonzeros are near the diagonal,
1869    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1870    or you will get TERRIBLE performance; see the users' manual chapter on
1871    matrices.
1872 
1873 .keywords: matrix, block, aij, compressed row, sparse, parallel
1874 
1875 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1876 @*/
1877 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1878                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1879 {
1880   Mat          B;
1881   Mat_MPIBAIJ  *b;
1882   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1883 
1884   PetscFunctionBegin;
1885   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1886 
1887   MPI_Comm_size(comm,&size);
1888   if (size == 1) {
1889     if (M == PETSC_DECIDE) M = m;
1890     if (N == PETSC_DECIDE) N = n;
1891     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1892     PetscFunctionReturn(0);
1893   }
1894 
1895   *A = 0;
1896   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
1897   PLogObjectCreate(B);
1898   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1899   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1900   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1901 
1902   B->ops->destroy    = MatDestroy_MPIBAIJ;
1903   B->ops->view       = MatView_MPIBAIJ;
1904   B->mapping    = 0;
1905   B->factor     = 0;
1906   B->assembled  = PETSC_FALSE;
1907 
1908   B->insertmode = NOT_SET_VALUES;
1909   MPI_Comm_rank(comm,&b->rank);
1910   MPI_Comm_size(comm,&b->size);
1911 
1912   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1913     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1914   }
1915   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1916     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1917   }
1918   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1919     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1920   }
1921   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1922   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1923 
1924   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1925     work[0] = m; work[1] = n;
1926     mbs = m/bs; nbs = n/bs;
1927     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1928     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1929     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1930   }
1931   if (m == PETSC_DECIDE) {
1932     Mbs = M/bs;
1933     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1934     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1935     m   = mbs*bs;
1936   }
1937   if (n == PETSC_DECIDE) {
1938     Nbs = N/bs;
1939     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1940     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1941     n   = nbs*bs;
1942   }
1943   if (mbs*bs != m || nbs*bs != n) {
1944     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1945   }
1946 
1947   b->m = m; B->m = m;
1948   b->n = n; B->n = n;
1949   b->N = N; B->N = N;
1950   b->M = M; B->M = M;
1951   b->bs  = bs;
1952   b->bs2 = bs*bs;
1953   b->mbs = mbs;
1954   b->nbs = nbs;
1955   b->Mbs = Mbs;
1956   b->Nbs = Nbs;
1957 
1958   /* build local table of row and column ownerships */
1959   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1960   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1961   b->cowners = b->rowners + b->size + 2;
1962   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1963   b->rowners[0] = 0;
1964   for ( i=2; i<=b->size; i++ ) {
1965     b->rowners[i] += b->rowners[i-1];
1966   }
1967   b->rstart    = b->rowners[b->rank];
1968   b->rend      = b->rowners[b->rank+1];
1969   b->rstart_bs = b->rstart * bs;
1970   b->rend_bs   = b->rend * bs;
1971 
1972   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1973   b->cowners[0] = 0;
1974   for ( i=2; i<=b->size; i++ ) {
1975     b->cowners[i] += b->cowners[i-1];
1976   }
1977   b->cstart    = b->cowners[b->rank];
1978   b->cend      = b->cowners[b->rank+1];
1979   b->cstart_bs = b->cstart * bs;
1980   b->cend_bs   = b->cend * bs;
1981 
1982 
1983   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1984   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1985   PLogObjectParent(B,b->A);
1986   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1987   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1988   PLogObjectParent(B,b->B);
1989 
1990   /* build cache for off array entries formed */
1991   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1992   b->donotstash  = 0;
1993   b->colmap      = 0;
1994   b->garray      = 0;
1995   b->roworiented = 1;
1996 
1997   /* stuff used in block assembly */
1998   b->barray       = 0;
1999 
2000   /* stuff used for matrix vector multiply */
2001   b->lvec         = 0;
2002   b->Mvctx        = 0;
2003 
2004   /* stuff for MatGetRow() */
2005   b->rowindices   = 0;
2006   b->rowvalues    = 0;
2007   b->getrowactive = PETSC_FALSE;
2008 
2009   /* hash table stuff */
2010   b->ht           = 0;
2011   b->hd           = 0;
2012   b->ht_size      = 0;
2013   b->ht_flag      = 0;
2014   b->ht_fact      = 0;
2015   b->ht_total_ct  = 0;
2016   b->ht_insert_ct = 0;
2017 
2018   *A = B;
2019   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2020   if (flg) {
2021     double fact = 1.39;
2022     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2023     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2024     if (fact <= 1.0) fact = 1.39;
2025     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2026     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2027   }
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #undef __FUNC__
2032 #define __FUNC__ "MatConvertSameType_MPIBAIJ"
2033 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
2034 {
2035   Mat         mat;
2036   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2037   int         ierr, len=0, flg;
2038 
2039   PetscFunctionBegin;
2040   *newmat       = 0;
2041   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
2042   PLogObjectCreate(mat);
2043   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2044   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
2045   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2046   mat->ops->view       = MatView_MPIBAIJ;
2047   mat->factor     = matin->factor;
2048   mat->assembled  = PETSC_TRUE;
2049 
2050   a->m = mat->m   = oldmat->m;
2051   a->n = mat->n   = oldmat->n;
2052   a->M = mat->M   = oldmat->M;
2053   a->N = mat->N   = oldmat->N;
2054 
2055   a->bs  = oldmat->bs;
2056   a->bs2 = oldmat->bs2;
2057   a->mbs = oldmat->mbs;
2058   a->nbs = oldmat->nbs;
2059   a->Mbs = oldmat->Mbs;
2060   a->Nbs = oldmat->Nbs;
2061 
2062   a->rstart       = oldmat->rstart;
2063   a->rend         = oldmat->rend;
2064   a->cstart       = oldmat->cstart;
2065   a->cend         = oldmat->cend;
2066   a->size         = oldmat->size;
2067   a->rank         = oldmat->rank;
2068   mat->insertmode = NOT_SET_VALUES;
2069   a->rowvalues    = 0;
2070   a->getrowactive = PETSC_FALSE;
2071   a->barray       = 0;
2072 
2073   /* hash table stuff */
2074   a->ht           = 0;
2075   a->hd           = 0;
2076   a->ht_size      = 0;
2077   a->ht_flag      = oldmat->ht_flag;
2078   a->ht_fact      = oldmat->ht_fact;
2079   a->ht_total_ct  = 0;
2080   a->ht_insert_ct = 0;
2081 
2082 
2083   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2084   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2085   a->cowners = a->rowners + a->size + 2;
2086   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2087   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
2088   if (oldmat->colmap) {
2089     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2090     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2091     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2092   } else a->colmap = 0;
2093   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2094     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2095     PLogObjectMemory(mat,len*sizeof(int));
2096     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2097   } else a->garray = 0;
2098 
2099   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2100   PLogObjectParent(mat,a->lvec);
2101   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2102   PLogObjectParent(mat,a->Mvctx);
2103   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
2104   PLogObjectParent(mat,a->A);
2105   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
2106   PLogObjectParent(mat,a->B);
2107   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2108   if (flg) {
2109     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2110   }
2111   *newmat = mat;
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 #include "sys.h"
2116 
2117 #undef __FUNC__
2118 #define __FUNC__ "MatLoad_MPIBAIJ"
2119 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2120 {
2121   Mat          A;
2122   int          i, nz, ierr, j,rstart, rend, fd;
2123   Scalar       *vals,*buf;
2124   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2125   MPI_Status   status;
2126   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2127   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2128   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2129   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2130   int          dcount,kmax,k,nzcount,tmp;
2131 
2132   PetscFunctionBegin;
2133   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2134 
2135   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2136   if (!rank) {
2137     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2138     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2139     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2140     if (header[3] < 0) {
2141       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2142     }
2143   }
2144 
2145   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2146   M = header[1]; N = header[2];
2147 
2148   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2149 
2150   /*
2151      This code adds extra rows to make sure the number of rows is
2152      divisible by the blocksize
2153   */
2154   Mbs        = M/bs;
2155   extra_rows = bs - M + bs*(Mbs);
2156   if (extra_rows == bs) extra_rows = 0;
2157   else                  Mbs++;
2158   if (extra_rows &&!rank) {
2159     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2160   }
2161 
2162   /* determine ownership of all rows */
2163   mbs = Mbs/size + ((Mbs % size) > rank);
2164   m   = mbs * bs;
2165   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2166   browners = rowners + size + 1;
2167   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2168   rowners[0] = 0;
2169   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2170   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2171   rstart = rowners[rank];
2172   rend   = rowners[rank+1];
2173 
2174   /* distribute row lengths to all processors */
2175   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2176   if (!rank) {
2177     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2178     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2179     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2180     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2181     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2182     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2183     PetscFree(sndcounts);
2184   } else {
2185     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2186   }
2187 
2188   if (!rank) {
2189     /* calculate the number of nonzeros on each processor */
2190     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2191     PetscMemzero(procsnz,size*sizeof(int));
2192     for ( i=0; i<size; i++ ) {
2193       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2194         procsnz[i] += rowlengths[j];
2195       }
2196     }
2197     PetscFree(rowlengths);
2198 
2199     /* determine max buffer needed and allocate it */
2200     maxnz = 0;
2201     for ( i=0; i<size; i++ ) {
2202       maxnz = PetscMax(maxnz,procsnz[i]);
2203     }
2204     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2205 
2206     /* read in my part of the matrix column indices  */
2207     nz = procsnz[0];
2208     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2209     mycols = ibuf;
2210     if (size == 1)  nz -= extra_rows;
2211     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2212     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2213 
2214     /* read in every ones (except the last) and ship off */
2215     for ( i=1; i<size-1; i++ ) {
2216       nz   = procsnz[i];
2217       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2218       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2219     }
2220     /* read in the stuff for the last proc */
2221     if ( size != 1 ) {
2222       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2223       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2224       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2225       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2226     }
2227     PetscFree(cols);
2228   } else {
2229     /* determine buffer space needed for message */
2230     nz = 0;
2231     for ( i=0; i<m; i++ ) {
2232       nz += locrowlens[i];
2233     }
2234     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2235     mycols = ibuf;
2236     /* receive message of column indices*/
2237     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2238     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2239     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2240   }
2241 
2242   /* loop over local rows, determining number of off diagonal entries */
2243   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2244   odlens = dlens + (rend-rstart);
2245   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2246   PetscMemzero(mask,3*Mbs*sizeof(int));
2247   masked1 = mask    + Mbs;
2248   masked2 = masked1 + Mbs;
2249   rowcount = 0; nzcount = 0;
2250   for ( i=0; i<mbs; i++ ) {
2251     dcount  = 0;
2252     odcount = 0;
2253     for ( j=0; j<bs; j++ ) {
2254       kmax = locrowlens[rowcount];
2255       for ( k=0; k<kmax; k++ ) {
2256         tmp = mycols[nzcount++]/bs;
2257         if (!mask[tmp]) {
2258           mask[tmp] = 1;
2259           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2260           else masked1[dcount++] = tmp;
2261         }
2262       }
2263       rowcount++;
2264     }
2265 
2266     dlens[i]  = dcount;
2267     odlens[i] = odcount;
2268 
2269     /* zero out the mask elements we set */
2270     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2271     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2272   }
2273 
2274   /* create our matrix */
2275   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2276          CHKERRQ(ierr);
2277   A = *newmat;
2278   MatSetOption(A,MAT_COLUMNS_SORTED);
2279 
2280   if (!rank) {
2281     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2282     /* read in my part of the matrix numerical values  */
2283     nz = procsnz[0];
2284     vals = buf;
2285     mycols = ibuf;
2286     if (size == 1)  nz -= extra_rows;
2287     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2288     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2289 
2290     /* insert into matrix */
2291     jj      = rstart*bs;
2292     for ( i=0; i<m; i++ ) {
2293       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2294       mycols += locrowlens[i];
2295       vals   += locrowlens[i];
2296       jj++;
2297     }
2298     /* read in other processors (except the last one) and ship out */
2299     for ( i=1; i<size-1; i++ ) {
2300       nz   = procsnz[i];
2301       vals = buf;
2302       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2303       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2304     }
2305     /* the last proc */
2306     if ( size != 1 ){
2307       nz   = procsnz[i] - extra_rows;
2308       vals = buf;
2309       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2310       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2311       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2312     }
2313     PetscFree(procsnz);
2314   } else {
2315     /* receive numeric values */
2316     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2317 
2318     /* receive message of values*/
2319     vals   = buf;
2320     mycols = ibuf;
2321     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2322     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2323     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2324 
2325     /* insert into matrix */
2326     jj      = rstart*bs;
2327     for ( i=0; i<m; i++ ) {
2328       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2329       mycols += locrowlens[i];
2330       vals   += locrowlens[i];
2331       jj++;
2332     }
2333   }
2334   PetscFree(locrowlens);
2335   PetscFree(buf);
2336   PetscFree(ibuf);
2337   PetscFree(rowners);
2338   PetscFree(dlens);
2339   PetscFree(mask);
2340   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2341   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 
2346 
2347 #undef __FUNC__
2348 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2349 /*@
2350    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2351 
2352    Input Parameters:
2353 .  mat  - the matrix
2354 .  fact - factor
2355 
2356    Collective on Mat
2357 
2358   Notes:
2359    This can also be set by the command line option: -mat_use_hash_table fact
2360 
2361 .keywords: matrix, hashtable, factor, HT
2362 
2363 .seealso: MatSetOption()
2364 @*/
2365 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2366 {
2367   Mat_MPIBAIJ *baij;
2368 
2369   PetscFunctionBegin;
2370   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2371   if (mat->type != MATMPIBAIJ) {
2372       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2373   }
2374   baij = (Mat_MPIBAIJ*) mat->data;
2375   baij->ht_fact = fact;
2376   PetscFunctionReturn(0);
2377 }
2378