xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision c31ba22a230fe2c23596c66917081dfec3ccf152)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.111 1998/03/04 16:07:09 balay Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"         /*I "mat.h" I*/
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 
10 extern int MatSetUpMultiply_MPIBAIJ(Mat);
11 extern int DisAssemble_MPIBAIJ(Mat);
12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
14 
15 /*
16      Local utility routine that creates a mapping from the global column
17    number to the local number in the off-diagonal part of the local
18    storage of the matrix.  This is done in a non scable way since the
19    length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
23 static int CreateColmap_MPIBAIJ_Private(Mat mat)
24 {
25   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
26   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
27   int         nbs = B->nbs,i,bs=B->bs;;
28 
29   PetscFunctionBegin;
30   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
31   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
32   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
33   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
34   PetscFunctionReturn(0);
35 }
36 
37 #define CHUNKSIZE  10
38 
39 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
40 { \
41  \
42     brow = row/bs;  \
43     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
44     rmax = aimax[brow]; nrow = ailen[brow]; \
45       bcol = col/bs; \
46       ridx = row % bs; cidx = col % bs; \
47       low = 0; high = nrow; \
48       while (high-low > 3) { \
49         t = (low+high)/2; \
50         if (rp[t] > bcol) high = t; \
51         else              low  = t; \
52       } \
53       for ( _i=low; _i<high; _i++ ) { \
54         if (rp[_i] > bcol) break; \
55         if (rp[_i] == bcol) { \
56           bap  = ap +  bs2*_i + bs*cidx + ridx; \
57           if (addv == ADD_VALUES) *bap += value;  \
58           else                    *bap  = value;  \
59           goto a_noinsert; \
60         } \
61       } \
62       if (a->nonew == 1) goto a_noinsert; \
63       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
64       if (nrow >= rmax) { \
65         /* there is no extra room in row, therefore enlarge */ \
66         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
67         Scalar *new_a; \
68  \
69         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
70  \
71         /* malloc new storage space */ \
72         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
73         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
74         new_j   = (int *) (new_a + bs2*new_nz); \
75         new_i   = new_j + new_nz; \
76  \
77         /* copy over old data into new slots */ \
78         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
79         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
80         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
81         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
82         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
83                                                            len*sizeof(int)); \
84         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
85         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
86         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
87                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
88         /* free up old matrix storage */ \
89         PetscFree(a->a);  \
90         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
91         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
92         a->singlemalloc = 1; \
93  \
94         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
96         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
97         a->maxnz += bs2*CHUNKSIZE; \
98         a->reallocs++; \
99         a->nz++; \
100       } \
101       N = nrow++ - 1;  \
102       /* shift up all the later entries in this row */ \
103       for ( ii=N; ii>=_i; ii-- ) { \
104         rp[ii+1] = rp[ii]; \
105         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
106       } \
107       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
108       rp[_i]                      = bcol;  \
109       ap[bs2*_i + bs*cidx + ridx] = value;  \
110       a_noinsert:; \
111     ailen[brow] = nrow; \
112 }
113 
114 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
115 { \
116  \
117     brow = row/bs;  \
118     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
119     rmax = bimax[brow]; nrow = bilen[brow]; \
120       bcol = col/bs; \
121       ridx = row % bs; cidx = col % bs; \
122       low = 0; high = nrow; \
123       while (high-low > 3) { \
124         t = (low+high)/2; \
125         if (rp[t] > bcol) high = t; \
126         else              low  = t; \
127       } \
128       for ( _i=low; _i<high; _i++ ) { \
129         if (rp[_i] > bcol) break; \
130         if (rp[_i] == bcol) { \
131           bap  = ap +  bs2*_i + bs*cidx + ridx; \
132           if (addv == ADD_VALUES) *bap += value;  \
133           else                    *bap  = value;  \
134           goto b_noinsert; \
135         } \
136       } \
137       if (b->nonew == 1) goto b_noinsert; \
138       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139       if (nrow >= rmax) { \
140         /* there is no extra room in row, therefore enlarge */ \
141         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
142         Scalar *new_a; \
143  \
144         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
145  \
146         /* malloc new storage space */ \
147         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
148         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
149         new_j   = (int *) (new_a + bs2*new_nz); \
150         new_i   = new_j + new_nz; \
151  \
152         /* copy over old data into new slots */ \
153         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
154         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
156         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
157         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
158                                                            len*sizeof(int)); \
159         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
160         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
161         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
162                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
163         /* free up old matrix storage */ \
164         PetscFree(b->a);  \
165         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
166         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
167         b->singlemalloc = 1; \
168  \
169         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
171         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
172         b->maxnz += bs2*CHUNKSIZE; \
173         b->reallocs++; \
174         b->nz++; \
175       } \
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for ( ii=N; ii>=_i; ii-- ) { \
179         rp[ii+1] = rp[ii]; \
180         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
181       } \
182       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNC__
190 #define __FUNC__ "MatSetValues_MPIBAIJ"
191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
192 {
193   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
194   Scalar      value;
195   int         ierr,i,j,row,col;
196   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
197   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
198   int         cend_orig=baij->cend_bs,bs=baij->bs;
199 
200   /* Some Variables required in the macro */
201   Mat         A = baij->A;
202   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
203   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204   Scalar      *aa=a->a;
205 
206   Mat         B = baij->B;
207   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
208   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209   Scalar      *ba=b->a;
210 
211   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
212   int         low,high,t,ridx,cidx,bs2=a->bs2;
213   Scalar      *ap,*bap;
214 
215   PetscFunctionBegin;
216   for ( i=0; i<m; i++ ) {
217 #if defined(USE_PETSC_BOPT_g)
218     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
219     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for ( j=0; j<n; j++ ) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig){
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         }
230 #if defined(USE_PETSC_BOPT_g)
231         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
232         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
240             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
241               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
244               B = baij->B;
245               b = (Mat_SeqBAIJ *) (B)->data;
246               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
247               ba=b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     } else {
256       if (roworiented && !baij->donotstash) {
257         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258       } else {
259         if (!baij->donotstash) {
260           row = im[i];
261 	  for ( j=0; j<n; j++ ) {
262 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
263           }
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
272 #undef __FUNC__
273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
275 {
276   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
277   Scalar      *value,*barray=baij->barray;
278   int         ierr,i,j,ii,jj,row,col,k,l;
279   int         roworiented = baij->roworiented,rstart=baij->rstart ;
280   int         rend=baij->rend,cstart=baij->cstart,stepval;
281   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
282 
283   if(!barray) {
284     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
285   }
286 
287   if (roworiented) {
288     stepval = (n-1)*bs;
289   } else {
290     stepval = (m-1)*bs;
291   }
292   for ( i=0; i<m; i++ ) {
293 #if defined(USE_PETSC_BOPT_g)
294     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
295     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
296 #endif
297     if (im[i] >= rstart && im[i] < rend) {
298       row = im[i] - rstart;
299       for ( j=0; j<n; j++ ) {
300         /* If NumCol = 1 then a copy is not required */
301         if ((roworiented) && (n == 1)) {
302           barray = v + i*bs2;
303         } else if((!roworiented) && (m == 1)) {
304           barray = v + j*bs2;
305         } else { /* Here a copy is required */
306           if (roworiented) {
307             value = v + i*(stepval+bs)*bs + j*bs;
308           } else {
309             value = v + j*(stepval+bs)*bs + i*bs;
310           }
311           for ( ii=0; ii<bs; ii++,value+=stepval ) {
312             for (jj=0; jj<bs; jj++ ) {
313               *barray++  = *value++;
314             }
315           }
316           barray -=bs2;
317         }
318 
319         if (in[j] >= cstart && in[j] < cend){
320           col  = in[j] - cstart;
321           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
322         }
323 #if defined(USE_PETSC_BOPT_g)
324         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
325         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
326 #endif
327         else {
328           if (mat->was_assembled) {
329             if (!baij->colmap) {
330               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
331             }
332 
333 #if defined(USE_PETSC_BOPT_g)
334             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
335 #endif
336             col = (baij->colmap[in[j]] - 1)/bs;
337             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
338               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
339               col =  in[j];
340             }
341           }
342           else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (!baij->donotstash) {
348         if (roworiented ) {
349           row   = im[i]*bs;
350           value = v + i*(stepval+bs)*bs;
351           for ( j=0; j<bs; j++,row++ ) {
352             for ( k=0; k<n; k++ ) {
353               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
354                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
355               }
356             }
357           }
358         } else {
359           for ( j=0; j<n; j++ ) {
360             value = v + j*(stepval+bs)*bs + i*bs;
361             col   = in[j]*bs;
362             for ( k=0; k<bs; k++,col++,value+=stepval) {
363               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
364                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
365               }
366             }
367           }
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 #include <math.h>
375 #define HASH_KEY 0.6180339887
376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNC__
380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
382 {
383   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
384   int         ierr,i,j,row,col;
385   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
386   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
387   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
388   double      tmp;
389   Scalar      ** HD = baij->hd,value;
390 #if defined(USE_PETSC_BOPT_g)
391   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
392 #endif
393 
394   PetscFunctionBegin;
395 
396   for ( i=0; i<m; i++ ) {
397 #if defined(USE_PETSC_BOPT_g)
398     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
399     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
400 #endif
401       row = im[i];
402     if (row >= rstart_orig && row < rend_orig) {
403       for ( j=0; j<n; j++ ) {
404         col = in[j];
405         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
406         /* Look up into the Hash Table */
407         key = (row/bs)*Nbs+(col/bs)+1;
408         h1  = HASH(size,key,tmp);
409 
410 
411         idx = h1;
412 #if defined(USE_PETSC_BOPT_g)
413         insert_ct++;
414         total_ct++;
415         if (HT[idx] != key) {
416           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
417           if (idx == size) {
418             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
419             if (idx == h1) {
420               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
421             }
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
427           if (idx == size) {
428             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
429             if (idx == h1) {
430               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
431             }
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else {
440       if (roworiented && !baij->donotstash) {
441         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
442       } else {
443         if (!baij->donotstash) {
444           row = im[i];
445 	  for ( j=0; j<n; j++ ) {
446 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
447           }
448         }
449       }
450     }
451   }
452 #if defined(USE_PETSC_BOPT_g)
453   baij->ht_total_ct = total_ct;
454   baij->ht_insert_ct = insert_ct;
455 #endif
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNC__
460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
462 {
463   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
464   int         ierr,i,j,ii,jj,row,col,k,l;
465   int         roworiented = baij->roworiented,rstart=baij->rstart ;
466   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
467   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
468   double      tmp;
469   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
470 #if defined(USE_PETSC_BOPT_g)
471   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
472 #endif
473 
474   PetscFunctionBegin;
475 
476   if (roworiented) {
477     stepval = (n-1)*bs;
478   } else {
479     stepval = (m-1)*bs;
480   }
481   for ( i=0; i<m; i++ ) {
482 #if defined(USE_PETSC_BOPT_g)
483     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
484     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
485 #endif
486     row   = im[i];
487     v_t   = v + i*bs2;
488     if (row >= rstart && row < rend) {
489       for ( j=0; j<n; j++ ) {
490         col = in[j];
491 
492         /* Look up into the Hash Table */
493         key = row*Nbs+col+1;
494         h1  = HASH(size,key,tmp);
495 
496         idx = h1;
497 #if defined(USE_PETSC_BOPT_g)
498         total_ct++;
499         insert_ct++;
500        if (HT[idx] != key) {
501           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
502           if (idx == size) {
503             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
504             if (idx == h1) {
505               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
506             }
507           }
508         }
509 #else
510         if (HT[idx] != key) {
511           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
512           if (idx == size) {
513             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
514             if (idx == h1) {
515               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
516             }
517           }
518         }
519 #endif
520         baij_a = HD[idx];
521         if (roworiented) {
522           /*value = v + i*(stepval+bs)*bs + j*bs;*/
523           /* value = v + (i*(stepval+bs)+j)*bs; */
524           value = v_t;
525           v_t  += bs;
526           if (addv == ADD_VALUES) {
527             for ( ii=0; ii<bs; ii++,value+=stepval) {
528               for ( jj=ii; jj<bs2; jj+=bs ) {
529                 baij_a[jj]  += *value++;
530               }
531             }
532           } else {
533             for ( ii=0; ii<bs; ii++,value+=stepval) {
534               for ( jj=ii; jj<bs2; jj+=bs ) {
535                 baij_a[jj]  = *value++;
536               }
537             }
538           }
539         } else {
540           value = v + j*(stepval+bs)*bs + i*bs;
541           if (addv == ADD_VALUES) {
542             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
543               for ( jj=0; jj<bs; jj++ ) {
544                 baij_a[jj]  += *value++;
545               }
546             }
547           } else {
548             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
549               for ( jj=0; jj<bs; jj++ ) {
550                 baij_a[jj]  = *value++;
551               }
552             }
553           }
554         }
555       }
556     } else {
557       if (!baij->donotstash) {
558         if (roworiented ) {
559           row   = im[i]*bs;
560           value = v + i*(stepval+bs)*bs;
561           for ( j=0; j<bs; j++,row++ ) {
562             for ( k=0; k<n; k++ ) {
563               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
564                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
565               }
566             }
567           }
568         } else {
569           for ( j=0; j<n; j++ ) {
570             value = v + j*(stepval+bs)*bs + i*bs;
571             col   = in[j]*bs;
572             for ( k=0; k<bs; k++,col++,value+=stepval) {
573               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
574                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
575               }
576             }
577           }
578         }
579       }
580     }
581   }
582 #if defined(USE_PETSC_BOPT_g)
583   baij->ht_total_ct = total_ct;
584   baij->ht_insert_ct = insert_ct;
585 #endif
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNC__
590 #define __FUNC__ "MatGetValues_MPIBAIJ"
591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
592 {
593   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
594   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
595   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
596 
597   PetscFunctionBegin;
598   for ( i=0; i<m; i++ ) {
599     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
600     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
601     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
602       row = idxm[i] - bsrstart;
603       for ( j=0; j<n; j++ ) {
604         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
605         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
606         if (idxn[j] >= bscstart && idxn[j] < bscend){
607           col = idxn[j] - bscstart;
608           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
609         } else {
610           if (!baij->colmap) {
611             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
612           }
613           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
614              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
615           else {
616             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
617             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
618           }
619         }
620       }
621     } else {
622       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
623     }
624   }
625  PetscFunctionReturn(0);
626 }
627 
628 #undef __FUNC__
629 #define __FUNC__ "MatNorm_MPIBAIJ"
630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
631 {
632   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
633   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
634   int        ierr, i,bs2=baij->bs2;
635   double     sum = 0.0;
636   Scalar     *v;
637 
638   PetscFunctionBegin;
639   if (baij->size == 1) {
640     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
641   } else {
642     if (type == NORM_FROBENIUS) {
643       v = amat->a;
644       for (i=0; i<amat->nz*bs2; i++ ) {
645 #if defined(USE_PETSC_COMPLEX)
646         sum += real(conj(*v)*(*v)); v++;
647 #else
648         sum += (*v)*(*v); v++;
649 #endif
650       }
651       v = bmat->a;
652       for (i=0; i<bmat->nz*bs2; i++ ) {
653 #if defined(USE_PETSC_COMPLEX)
654         sum += real(conj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
660       *norm = sqrt(*norm);
661     } else {
662       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
663     }
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
673   MPI_Comm    comm = mat->comm;
674   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
675   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
676   MPI_Request *send_waits,*recv_waits;
677   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
678   InsertMode  addv;
679   Scalar      *rvalues,*svalues;
680 
681   PetscFunctionBegin;
682   /* make sure all processors are either in INSERTMODE or ADDMODE */
683   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
684   if (addv == (ADD_VALUES|INSERT_VALUES)) {
685     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
686   }
687   mat->insertmode = addv; /* in case this processor had no cache */
688 
689   /*  first count number of contributors to each processor */
690   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
691   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
692   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
693   for ( i=0; i<baij->stash.n; i++ ) {
694     idx = baij->stash.idx[i];
695     for ( j=0; j<size; j++ ) {
696       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
697         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
698       }
699     }
700   }
701   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
702 
703   /* inform other processors of number of messages and max length*/
704   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
705   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
706   nreceives = work[rank];
707   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
708   nmax      = work[rank];
709   PetscFree(work);
710 
711   /* post receives:
712        1) each message will consist of ordered pairs
713      (global index,value) we store the global index as a double
714      to simplify the message passing.
715        2) since we don't know how long each individual message is we
716      allocate the largest needed buffer for each receive. Potentially
717      this is a lot of wasted space.
718 
719 
720        This could be done better.
721   */
722   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
723   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
724   for ( i=0; i<nreceives; i++ ) {
725     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
726   }
727 
728   /* do sends:
729       1) starts[i] gives the starting index in svalues for stuff going to
730          the ith processor
731   */
732   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
733   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
734   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
735   starts[0] = 0;
736   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
737   for ( i=0; i<baij->stash.n; i++ ) {
738     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
739     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
740     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
741   }
742   PetscFree(owner);
743   starts[0] = 0;
744   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
745   count = 0;
746   for ( i=0; i<size; i++ ) {
747     if (procs[i]) {
748       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
749     }
750   }
751   PetscFree(starts); PetscFree(nprocs);
752 
753   /* Free cache space */
754   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
755   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
756 
757   baij->svalues    = svalues;    baij->rvalues    = rvalues;
758   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
759   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
760   baij->rmax       = nmax;
761 
762   PetscFunctionReturn(0);
763 }
764 
765 /*
766   Creates the hash table, and sets the table
767   This table is created only once.
768   If new entried need to be added to the matrix
769   then the hash table has to be destroyed and
770   recreated.
771 */
772 #undef __FUNC__
773 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
774 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
775 {
776   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
777   Mat         A = baij->A, B=baij->B;
778   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
779   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
780   int         size,bs2=baij->bs2,rstart=baij->rstart;
781   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
782   int         *HT,key;
783   Scalar      **HD;
784   double      tmp;
785 #if defined(USE_PETSC_BOPT_g)
786   int         ct=0,max=0;
787 #endif
788 
789   PetscFunctionBegin;
790   baij->ht_size=(int)(factor*nz);
791   size = baij->ht_size;
792 
793   if (baij->ht) {
794     PetscFunctionReturn(0);
795   }
796 
797   /* Allocate Memory for Hash Table */
798   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
799   baij->ht = (int*)(baij->hd + size);
800   HD = baij->hd;
801   HT = baij->ht;
802 
803 
804   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
805 
806 
807   /* Loop Over A */
808   for ( i=0; i<a->mbs; i++ ) {
809     for ( j=ai[i]; j<ai[i+1]; j++ ) {
810       row = i+rstart;
811       col = aj[j]+cstart;
812 
813       key = row*Nbs + col + 1;
814       h1  = HASH(size,key,tmp);
815       for ( k=0; k<size; k++ ){
816         if (HT[(h1+k)%size] == 0.0) {
817           HT[(h1+k)%size] = key;
818           HD[(h1+k)%size] = a->a + j*bs2;
819           break;
820 #if defined(USE_PETSC_BOPT_g)
821         } else {
822           ct++;
823 #endif
824         }
825       }
826 #if defined(USE_PETSC_BOPT_g)
827       if (k> max) max = k;
828 #endif
829     }
830   }
831   /* Loop Over B */
832   for ( i=0; i<b->mbs; i++ ) {
833     for ( j=bi[i]; j<bi[i+1]; j++ ) {
834       row = i+rstart;
835       col = garray[bj[j]];
836       key = row*Nbs + col + 1;
837       h1  = HASH(size,key,tmp);
838       for ( k=0; k<size; k++ ){
839         if (HT[(h1+k)%size] == 0.0) {
840           HT[(h1+k)%size] = key;
841           HD[(h1+k)%size] = b->a + j*bs2;
842           break;
843 #if defined(USE_PETSC_BOPT_g)
844         } else {
845           ct++;
846 #endif
847         }
848       }
849 #if defined(USE_PETSC_BOPT_g)
850       if (k> max) max = k;
851 #endif
852     }
853   }
854 
855   /* Print Summary */
856 #if defined(USE_PETSC_BOPT_g)
857   for ( i=0,j=0; i<size; i++)
858     if (HT[i]) {j++;}
859   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
860            (j== 0)? 0.0:((double)(ct+j))/j,max);
861 #endif
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNC__
866 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
867 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
868 {
869   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
870   MPI_Status  *send_status,recv_status;
871   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
872   int         bs=baij->bs,row,col,other_disassembled;
873   Scalar      *values,val;
874   InsertMode  addv = mat->insertmode;
875 
876   PetscFunctionBegin;
877   /*  wait on receives */
878   while (count) {
879     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
880     /* unpack receives into our local space */
881     values = baij->rvalues + 3*imdex*baij->rmax;
882     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
883     n = n/3;
884     for ( i=0; i<n; i++ ) {
885       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
886       col = (int) PetscReal(values[3*i+1]);
887       val = values[3*i+2];
888       if (col >= baij->cstart*bs && col < baij->cend*bs) {
889         col -= baij->cstart*bs;
890         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
891       } else {
892         if (mat->was_assembled) {
893           if (!baij->colmap) {
894             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
895           }
896           col = (baij->colmap[col/bs]) - 1 + col%bs;
897           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
898             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
899             col = (int) PetscReal(values[3*i+1]);
900           }
901         }
902         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
903       }
904     }
905     count--;
906   }
907   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
908 
909   /* wait on sends */
910   if (baij->nsends) {
911     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
912     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
913     PetscFree(send_status);
914   }
915   PetscFree(baij->send_waits); PetscFree(baij->svalues);
916 
917   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
918   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
919 
920   /* determine if any processor has disassembled, if so we must
921      also disassemble ourselfs, in order that we may reassemble. */
922   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
923   if (mat->was_assembled && !other_disassembled) {
924     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
929   }
930   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
932 
933 #if defined(USE_PETSC_BOPT_g)
934   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
935     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
936              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNC__
952 #define __FUNC__ "MatView_MPIBAIJ_Binary"
953 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
954 {
955   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
956   int          ierr;
957 
958   PetscFunctionBegin;
959   if (baij->size == 1) {
960     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
961   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNC__
966 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
967 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
968 {
969   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
970   int          ierr, format,rank,bs = baij->bs;
971   FILE         *fd;
972   ViewerType   vtype;
973 
974   PetscFunctionBegin;
975   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
976   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
977     ierr = ViewerGetFormat(viewer,&format);
978     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
979       MatInfo info;
980       MPI_Comm_rank(mat->comm,&rank);
981       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
982       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
983       PetscSequentialPhaseBegin(mat->comm,1);
984       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
985               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
986               baij->bs,(int)info.memory);
987       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
988       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
989       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
990       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
991       fflush(fd);
992       PetscSequentialPhaseEnd(mat->comm,1);
993       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
994       PetscFunctionReturn(0);
995     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
996       PetscPrintf(mat->comm,"  block size is %d\n",bs);
997       PetscFunctionReturn(0);
998     }
999   }
1000 
1001   if (vtype == DRAW_VIEWER) {
1002     Draw       draw;
1003     PetscTruth isnull;
1004     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
1005     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1006   }
1007 
1008   if (vtype == ASCII_FILE_VIEWER) {
1009     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1010     PetscSequentialPhaseBegin(mat->comm,1);
1011     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
1012            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
1013             baij->cstart*bs,baij->cend*bs);
1014     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1015     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
1016     fflush(fd);
1017     PetscSequentialPhaseEnd(mat->comm,1);
1018   } else {
1019     int size = baij->size;
1020     rank = baij->rank;
1021     if (size == 1) {
1022       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1023     } else {
1024       /* assemble the entire matrix onto first processor. */
1025       Mat         A;
1026       Mat_SeqBAIJ *Aloc;
1027       int         M = baij->M, N = baij->N,*ai,*aj,row,col,i,j,k,*rvals;
1028       int         mbs=baij->mbs;
1029       Scalar      *a;
1030 
1031       if (!rank) {
1032         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1033       } else {
1034         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1035       }
1036       PLogObjectParent(mat,A);
1037 
1038       /* copy over the A part */
1039       Aloc = (Mat_SeqBAIJ*) baij->A->data;
1040       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1041       row = baij->rstart;
1042       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1043 
1044       for ( i=0; i<mbs; i++ ) {
1045         rvals[0] = bs*(baij->rstart + i);
1046         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1047         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1048           col = (baij->cstart+aj[j])*bs;
1049           for (k=0; k<bs; k++ ) {
1050             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1051             col++; a += bs;
1052           }
1053         }
1054       }
1055       /* copy over the B part */
1056       Aloc = (Mat_SeqBAIJ*) baij->B->data;
1057       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1058       row = baij->rstart*bs;
1059       for ( i=0; i<mbs; i++ ) {
1060         rvals[0] = bs*(baij->rstart + i);
1061         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1062         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1063           col = baij->garray[aj[j]]*bs;
1064           for (k=0; k<bs; k++ ) {
1065             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1066             col++; a += bs;
1067           }
1068         }
1069       }
1070       PetscFree(rvals);
1071       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1072       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1073       /*
1074          Everyone has to call to draw the matrix since the graphics waits are
1075          synchronized across all processors that share the Draw object
1076       */
1077       if (!rank || vtype == DRAW_VIEWER) {
1078         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1079       }
1080       ierr = MatDestroy(A); CHKERRQ(ierr);
1081     }
1082   }
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 
1087 
1088 #undef __FUNC__
1089 #define __FUNC__ "MatView_MPIBAIJ"
1090 int MatView_MPIBAIJ(PetscObject obj,Viewer viewer)
1091 {
1092   Mat         mat = (Mat) obj;
1093   int         ierr;
1094   ViewerType  vtype;
1095 
1096   PetscFunctionBegin;
1097   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1098   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
1099       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
1100     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
1101   } else if (vtype == BINARY_FILE_VIEWER) {
1102     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1103   }
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNC__
1108 #define __FUNC__ "MatDestroy_MPIBAIJ"
1109 int MatDestroy_MPIBAIJ(PetscObject obj)
1110 {
1111   Mat         mat = (Mat) obj;
1112   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1113   int         ierr;
1114 
1115   PetscFunctionBegin;
1116 #if defined(USE_PETSC_LOG)
1117   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
1118 #endif
1119 
1120   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1121   PetscFree(baij->rowners);
1122   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1123   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1124   if (baij->colmap) PetscFree(baij->colmap);
1125   if (baij->garray) PetscFree(baij->garray);
1126   if (baij->lvec)   VecDestroy(baij->lvec);
1127   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1128   if (baij->rowvalues) PetscFree(baij->rowvalues);
1129   if (baij->barray) PetscFree(baij->barray);
1130   if (baij->hd) PetscFree(baij->hd);
1131   PetscFree(baij);
1132   PLogObjectDestroy(mat);
1133   PetscHeaderDestroy(mat);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNC__
1138 #define __FUNC__ "MatMult_MPIBAIJ"
1139 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1140 {
1141   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1142   int         ierr, nt;
1143 
1144   PetscFunctionBegin;
1145   VecGetLocalSize_Fast(xx,nt);
1146   if (nt != a->n) {
1147     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1148   }
1149   VecGetLocalSize_Fast(yy,nt);
1150   if (nt != a->m) {
1151     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1152   }
1153   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1154   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1155   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1156   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1157   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNC__
1162 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1163 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1164 {
1165   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1166   int        ierr;
1167 
1168   PetscFunctionBegin;
1169   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1170   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1171   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1172   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNC__
1177 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1178 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1179 {
1180   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1181   int         ierr;
1182 
1183   PetscFunctionBegin;
1184   /* do nondiagonal part */
1185   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1186   /* send it on its way */
1187   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1188   /* do local part */
1189   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1190   /* receive remote parts: note this assumes the values are not actually */
1191   /* inserted in yy until the next line, which is true for my implementation*/
1192   /* but is not perhaps always true. */
1193   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNC__
1198 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1199 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1200 {
1201   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1202   int         ierr;
1203 
1204   PetscFunctionBegin;
1205   /* do nondiagonal part */
1206   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1207   /* send it on its way */
1208   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1209   /* do local part */
1210   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1211   /* receive remote parts: note this assumes the values are not actually */
1212   /* inserted in yy until the next line, which is true for my implementation*/
1213   /* but is not perhaps always true. */
1214   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 /*
1219   This only works correctly for square matrices where the subblock A->A is the
1220    diagonal block
1221 */
1222 #undef __FUNC__
1223 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1224 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1225 {
1226   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1227   int         ierr;
1228 
1229   PetscFunctionBegin;
1230   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1231   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNC__
1236 #define __FUNC__ "MatScale_MPIBAIJ"
1237 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1238 {
1239   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1240   int         ierr;
1241 
1242   PetscFunctionBegin;
1243   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1244   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNC__
1249 #define __FUNC__ "MatGetSize_MPIBAIJ"
1250 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1251 {
1252   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1253 
1254   PetscFunctionBegin;
1255   if (m) *m = mat->M;
1256   if (n) *n = mat->N;
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNC__
1261 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1262 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1263 {
1264   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1265 
1266   PetscFunctionBegin;
1267   *m = mat->m; *n = mat->n;
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNC__
1272 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1273 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1274 {
1275   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1276 
1277   PetscFunctionBegin;
1278   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1283 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1284 
1285 #undef __FUNC__
1286 #define __FUNC__ "MatGetRow_MPIBAIJ"
1287 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1288 {
1289   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1290   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1291   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1292   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1293   int        *cmap, *idx_p,cstart = mat->cstart;
1294 
1295   PetscFunctionBegin;
1296   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1297   mat->getrowactive = PETSC_TRUE;
1298 
1299   if (!mat->rowvalues && (idx || v)) {
1300     /*
1301         allocate enough space to hold information from the longest row.
1302     */
1303     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1304     int     max = 1,mbs = mat->mbs,tmp;
1305     for ( i=0; i<mbs; i++ ) {
1306       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1307       if (max < tmp) { max = tmp; }
1308     }
1309     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1310     CHKPTRQ(mat->rowvalues);
1311     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1312   }
1313 
1314   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1315   lrow = row - brstart;
1316 
1317   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1318   if (!v)   {pvA = 0; pvB = 0;}
1319   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1320   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1321   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1322   nztot = nzA + nzB;
1323 
1324   cmap  = mat->garray;
1325   if (v  || idx) {
1326     if (nztot) {
1327       /* Sort by increasing column numbers, assuming A and B already sorted */
1328       int imark = -1;
1329       if (v) {
1330         *v = v_p = mat->rowvalues;
1331         for ( i=0; i<nzB; i++ ) {
1332           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1333           else break;
1334         }
1335         imark = i;
1336         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1337         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1338       }
1339       if (idx) {
1340         *idx = idx_p = mat->rowindices;
1341         if (imark > -1) {
1342           for ( i=0; i<imark; i++ ) {
1343             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1344           }
1345         } else {
1346           for ( i=0; i<nzB; i++ ) {
1347             if (cmap[cworkB[i]/bs] < cstart)
1348               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1349             else break;
1350           }
1351           imark = i;
1352         }
1353         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1354         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1355       }
1356     } else {
1357       if (idx) *idx = 0;
1358       if (v)   *v   = 0;
1359     }
1360   }
1361   *nz = nztot;
1362   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1363   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNC__
1368 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1369 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1370 {
1371   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1372 
1373   PetscFunctionBegin;
1374   if (baij->getrowactive == PETSC_FALSE) {
1375     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1376   }
1377   baij->getrowactive = PETSC_FALSE;
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNC__
1382 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1383 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1384 {
1385   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1386 
1387   PetscFunctionBegin;
1388   *bs = baij->bs;
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNC__
1393 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1394 int MatZeroEntries_MPIBAIJ(Mat A)
1395 {
1396   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1397   int         ierr;
1398 
1399   PetscFunctionBegin;
1400   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1401   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNC__
1406 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1407 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1408 {
1409   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1410   Mat         A = a->A, B = a->B;
1411   int         ierr;
1412   double      isend[5], irecv[5];
1413 
1414   PetscFunctionBegin;
1415   info->block_size     = (double)a->bs;
1416   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1417   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
1418   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1419   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
1420   if (flag == MAT_LOCAL) {
1421     info->nz_used      = isend[0];
1422     info->nz_allocated = isend[1];
1423     info->nz_unneeded  = isend[2];
1424     info->memory       = isend[3];
1425     info->mallocs      = isend[4];
1426   } else if (flag == MAT_GLOBAL_MAX) {
1427     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr);
1428     info->nz_used      = irecv[0];
1429     info->nz_allocated = irecv[1];
1430     info->nz_unneeded  = irecv[2];
1431     info->memory       = irecv[3];
1432     info->mallocs      = irecv[4];
1433   } else if (flag == MAT_GLOBAL_SUM) {
1434     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr);
1435     info->nz_used      = irecv[0];
1436     info->nz_allocated = irecv[1];
1437     info->nz_unneeded  = irecv[2];
1438     info->memory       = irecv[3];
1439     info->mallocs      = irecv[4];
1440   }
1441   info->rows_global       = (double)a->M;
1442   info->columns_global    = (double)a->N;
1443   info->rows_local        = (double)a->m;
1444   info->columns_local     = (double)a->N;
1445   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1446   info->fill_ratio_needed = 0;
1447   info->factor_mallocs    = 0;
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNC__
1452 #define __FUNC__ "MatSetOption_MPIBAIJ"
1453 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1454 {
1455   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1456 
1457   PetscFunctionBegin;
1458   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1459       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1460       op == MAT_COLUMNS_UNSORTED ||
1461       op == MAT_COLUMNS_SORTED ||
1462       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1463       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1464         MatSetOption(a->A,op);
1465         MatSetOption(a->B,op);
1466   } else if (op == MAT_ROW_ORIENTED) {
1467         a->roworiented = 1;
1468         MatSetOption(a->A,op);
1469         MatSetOption(a->B,op);
1470   } else if (op == MAT_ROWS_SORTED ||
1471              op == MAT_ROWS_UNSORTED ||
1472              op == MAT_SYMMETRIC ||
1473              op == MAT_STRUCTURALLY_SYMMETRIC ||
1474              op == MAT_YES_NEW_DIAGONALS ||
1475              op == MAT_USE_HASH_TABLE)
1476     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1477   else if (op == MAT_COLUMN_ORIENTED) {
1478     a->roworiented = 0;
1479     MatSetOption(a->A,op);
1480     MatSetOption(a->B,op);
1481   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1482     a->donotstash = 1;
1483   } else if (op == MAT_NO_NEW_DIAGONALS) {
1484     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1485   } else if (op == MAT_USE_HASH_TABLE) {
1486     a->ht_flag = 1;
1487   } else {
1488     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1489   }
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNC__
1494 #define __FUNC__ "MatTranspose_MPIBAIJ("
1495 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1496 {
1497   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1498   Mat_SeqBAIJ *Aloc;
1499   Mat        B;
1500   int        ierr,M=baij->M,N=baij->N,*ai,*aj,row,i,*rvals,j,k,col;
1501   int        bs=baij->bs,mbs=baij->mbs;
1502   Scalar     *a;
1503 
1504   PetscFunctionBegin;
1505   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1506   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1507   CHKERRQ(ierr);
1508 
1509   /* copy over the A part */
1510   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1511   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1512   row = baij->rstart;
1513   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1514 
1515   for ( i=0; i<mbs; i++ ) {
1516     rvals[0] = bs*(baij->rstart + i);
1517     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1518     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1519       col = (baij->cstart+aj[j])*bs;
1520       for (k=0; k<bs; k++ ) {
1521         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1522         col++; a += bs;
1523       }
1524     }
1525   }
1526   /* copy over the B part */
1527   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1528   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1529   row = baij->rstart*bs;
1530   for ( i=0; i<mbs; i++ ) {
1531     rvals[0] = bs*(baij->rstart + i);
1532     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1533     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1534       col = baij->garray[aj[j]]*bs;
1535       for (k=0; k<bs; k++ ) {
1536         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1537         col++; a += bs;
1538       }
1539     }
1540   }
1541   PetscFree(rvals);
1542   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1543   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1544 
1545   if (matout != PETSC_NULL) {
1546     *matout = B;
1547   } else {
1548     PetscOps       *Abops;
1549     struct _MatOps *Aops;
1550 
1551     /* This isn't really an in-place transpose .... but free data structures from baij */
1552     PetscFree(baij->rowners);
1553     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1554     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1555     if (baij->colmap) PetscFree(baij->colmap);
1556     if (baij->garray) PetscFree(baij->garray);
1557     if (baij->lvec) VecDestroy(baij->lvec);
1558     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1559     PetscFree(baij);
1560 
1561     /*
1562        This is horrible, horrible code. We need to keep the
1563       A pointers for the bops and ops but copy everything
1564       else from C.
1565     */
1566     Abops = A->bops;
1567     Aops  = A->ops;
1568     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1569     A->bops = Abops;
1570     A->ops  = Aops;
1571 
1572     PetscHeaderDestroy(B);
1573   }
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNC__
1578 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1579 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1580 {
1581   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1582   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1583   int ierr,s1,s2,s3;
1584 
1585   PetscFunctionBegin;
1586   if (ll)  {
1587     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1588     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1589     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1590     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1591     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1592   }
1593   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #undef __FUNC__
1598 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1599 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1600 {
1601   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1602   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1603   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1604   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1605   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1606   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1607   MPI_Comm       comm = A->comm;
1608   MPI_Request    *send_waits,*recv_waits;
1609   MPI_Status     recv_status,*send_status;
1610   IS             istmp;
1611 
1612   PetscFunctionBegin;
1613   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1614   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1615 
1616   /*  first count number of contributors to each processor */
1617   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1618   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1619   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1620   for ( i=0; i<N; i++ ) {
1621     idx = rows[i];
1622     found = 0;
1623     for ( j=0; j<size; j++ ) {
1624       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1625         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1626       }
1627     }
1628     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1629   }
1630   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1631 
1632   /* inform other processors of number of messages and max length*/
1633   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1634   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1635   nrecvs = work[rank];
1636   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1637   nmax   = work[rank];
1638   PetscFree(work);
1639 
1640   /* post receives:   */
1641   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1642   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1643   for ( i=0; i<nrecvs; i++ ) {
1644     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1645   }
1646 
1647   /* do sends:
1648      1) starts[i] gives the starting index in svalues for stuff going to
1649      the ith processor
1650   */
1651   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1652   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1653   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1654   starts[0] = 0;
1655   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1656   for ( i=0; i<N; i++ ) {
1657     svalues[starts[owner[i]]++] = rows[i];
1658   }
1659   ISRestoreIndices(is,&rows);
1660 
1661   starts[0] = 0;
1662   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1663   count = 0;
1664   for ( i=0; i<size; i++ ) {
1665     if (procs[i]) {
1666       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1667     }
1668   }
1669   PetscFree(starts);
1670 
1671   base = owners[rank]*bs;
1672 
1673   /*  wait on receives */
1674   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1675   source = lens + nrecvs;
1676   count  = nrecvs; slen = 0;
1677   while (count) {
1678     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1679     /* unpack receives into our local space */
1680     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1681     source[imdex]  = recv_status.MPI_SOURCE;
1682     lens[imdex]  = n;
1683     slen += n;
1684     count--;
1685   }
1686   PetscFree(recv_waits);
1687 
1688   /* move the data into the send scatter */
1689   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1690   count = 0;
1691   for ( i=0; i<nrecvs; i++ ) {
1692     values = rvalues + i*nmax;
1693     for ( j=0; j<lens[i]; j++ ) {
1694       lrows[count++] = values[j] - base;
1695     }
1696   }
1697   PetscFree(rvalues); PetscFree(lens);
1698   PetscFree(owner); PetscFree(nprocs);
1699 
1700   /* actually zap the local rows */
1701   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1702   PLogObjectParent(A,istmp);
1703 
1704   ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1705   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1706   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1707 
1708   if (diag) {
1709     for ( i = 0; i < slen; i++ ) {
1710       row = lrows[i] + rstart_bs;
1711       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1712     }
1713     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1714     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1715   }
1716   PetscFree(lrows);
1717 
1718   /* wait on sends */
1719   if (nsends) {
1720     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1721     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1722     PetscFree(send_status);
1723   }
1724   PetscFree(send_waits); PetscFree(svalues);
1725 
1726   PetscFunctionReturn(0);
1727 }
1728 extern int MatPrintHelp_SeqBAIJ(Mat);
1729 #undef __FUNC__
1730 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1731 int MatPrintHelp_MPIBAIJ(Mat A)
1732 {
1733   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1734   MPI_Comm    comm = A->comm;
1735   static int  called = 0;
1736   int         ierr;
1737 
1738   PetscFunctionBegin;
1739   if (!a->rank) {
1740     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1741   }
1742   if (called) {PetscFunctionReturn(0);} else called = 1;
1743   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1744   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 #undef __FUNC__
1749 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1750 int MatSetUnfactored_MPIBAIJ(Mat A)
1751 {
1752   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1753   int         ierr;
1754 
1755   PetscFunctionBegin;
1756   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
1761 
1762 /* -------------------------------------------------------------------*/
1763 static struct _MatOps MatOps = {
1764   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
1765   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
1766   0,0,0,0,
1767   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
1768   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
1769   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
1770   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
1771   0,0,0,MatGetSize_MPIBAIJ,
1772   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1773   0,0,MatConvertSameType_MPIBAIJ,0,0,
1774   0,0,0,MatGetSubMatrices_MPIBAIJ,
1775   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1776   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1777   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
1778 
1779 
1780 #undef __FUNC__
1781 #define __FUNC__ "MatCreateMPIBAIJ"
1782 /*@C
1783    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1784    (block compressed row).  For good matrix assembly performance
1785    the user should preallocate the matrix storage by setting the parameters
1786    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1787    performance can be increased by more than a factor of 50.
1788 
1789    Input Parameters:
1790 .  comm - MPI communicator
1791 .  bs   - size of blockk
1792 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1793            This value should be the same as the local size used in creating the
1794            y vector for the matrix-vector product y = Ax.
1795 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1796            This value should be the same as the local size used in creating the
1797            x vector for the matrix-vector product y = Ax.
1798 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1799 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1800 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1801            submatrix  (same for all local rows)
1802 .  d_nzz - array containing the number of block nonzeros in the various block rows
1803            of the in diagonal portion of the local (possibly different for each block
1804            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
1805            it is zero.
1806 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1807            submatrix (same for all local rows).
1808 .  o_nzz - array containing the number of nonzeros in the various block rows of the
1809            off-diagonal portion of the local submatrix (possibly different for
1810            each block row) or PETSC_NULL.
1811 
1812    Output Parameter:
1813 .  A - the matrix
1814 
1815    Notes:
1816    The user MUST specify either the local or global matrix dimensions
1817    (possibly both).
1818 
1819    Storage Information:
1820    For a square global matrix we define each processor's diagonal portion
1821    to be its local rows and the corresponding columns (a square submatrix);
1822    each processor's off-diagonal portion encompasses the remainder of the
1823    local matrix (a rectangular submatrix).
1824 
1825    The user can specify preallocated storage for the diagonal part of
1826    the local submatrix with either d_nz or d_nnz (not both).  Set
1827    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1828    memory allocation.  Likewise, specify preallocated storage for the
1829    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1830 
1831    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1832    the figure below we depict these three local rows and all columns (0-11).
1833 
1834 $          0 1 2 3 4 5 6 7 8 9 10 11
1835 $         -------------------
1836 $  row 3  |  o o o d d d o o o o o o
1837 $  row 4  |  o o o d d d o o o o o o
1838 $  row 5  |  o o o d d d o o o o o o
1839 $         -------------------
1840 $
1841 
1842    Thus, any entries in the d locations are stored in the d (diagonal)
1843    submatrix, and any entries in the o locations are stored in the
1844    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1845    stored simply in the MATSEQBAIJ format for compressed row storage.
1846 
1847    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1848    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1849    In general, for PDE problems in which most nonzeros are near the diagonal,
1850    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1851    or you will get TERRIBLE performance; see the users' manual chapter on
1852    matrices.
1853 
1854 .keywords: matrix, block, aij, compressed row, sparse, parallel
1855 
1856 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
1857 @*/
1858 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1859                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1860 {
1861   Mat          B;
1862   Mat_MPIBAIJ  *b;
1863   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1864 
1865   PetscFunctionBegin;
1866   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1867 
1868   MPI_Comm_size(comm,&size);
1869   if (size == 1) {
1870     if (M == PETSC_DECIDE) M = m;
1871     if (N == PETSC_DECIDE) N = n;
1872     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1873     PetscFunctionReturn(0);
1874   }
1875 
1876   *A = 0;
1877   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
1878   PLogObjectCreate(B);
1879   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1880   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1881   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1882 
1883   B->destroy    = MatDestroy_MPIBAIJ;
1884   B->view       = MatView_MPIBAIJ;
1885   B->mapping    = 0;
1886   B->factor     = 0;
1887   B->assembled  = PETSC_FALSE;
1888 
1889   B->insertmode = NOT_SET_VALUES;
1890   MPI_Comm_rank(comm,&b->rank);
1891   MPI_Comm_size(comm,&b->size);
1892 
1893   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1894     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1895   }
1896   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1897     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1898   }
1899   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1900     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1901   }
1902   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1903   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1904 
1905   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1906     work[0] = m; work[1] = n;
1907     mbs = m/bs; nbs = n/bs;
1908     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1909     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1910     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1911   }
1912   if (m == PETSC_DECIDE) {
1913     Mbs = M/bs;
1914     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1915     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1916     m   = mbs*bs;
1917   }
1918   if (n == PETSC_DECIDE) {
1919     Nbs = N/bs;
1920     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1921     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1922     n   = nbs*bs;
1923   }
1924   if (mbs*bs != m || nbs*bs != n) {
1925     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1926   }
1927 
1928   b->m = m; B->m = m;
1929   b->n = n; B->n = n;
1930   b->N = N; B->N = N;
1931   b->M = M; B->M = M;
1932   b->bs  = bs;
1933   b->bs2 = bs*bs;
1934   b->mbs = mbs;
1935   b->nbs = nbs;
1936   b->Mbs = Mbs;
1937   b->Nbs = Nbs;
1938 
1939   /* build local table of row and column ownerships */
1940   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1941   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1942   b->cowners = b->rowners + b->size + 2;
1943   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1944   b->rowners[0] = 0;
1945   for ( i=2; i<=b->size; i++ ) {
1946     b->rowners[i] += b->rowners[i-1];
1947   }
1948   b->rstart    = b->rowners[b->rank];
1949   b->rend      = b->rowners[b->rank+1];
1950   b->rstart_bs = b->rstart * bs;
1951   b->rend_bs   = b->rend * bs;
1952 
1953   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1954   b->cowners[0] = 0;
1955   for ( i=2; i<=b->size; i++ ) {
1956     b->cowners[i] += b->cowners[i-1];
1957   }
1958   b->cstart    = b->cowners[b->rank];
1959   b->cend      = b->cowners[b->rank+1];
1960   b->cstart_bs = b->cstart * bs;
1961   b->cend_bs   = b->cend * bs;
1962 
1963 
1964   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1965   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1966   PLogObjectParent(B,b->A);
1967   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1968   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1969   PLogObjectParent(B,b->B);
1970 
1971   /* build cache for off array entries formed */
1972   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1973   b->donotstash  = 0;
1974   b->colmap      = 0;
1975   b->garray      = 0;
1976   b->roworiented = 1;
1977 
1978   /* stuff used in block assembly */
1979   b->barray       = 0;
1980 
1981   /* stuff used for matrix vector multiply */
1982   b->lvec         = 0;
1983   b->Mvctx        = 0;
1984 
1985   /* stuff for MatGetRow() */
1986   b->rowindices   = 0;
1987   b->rowvalues    = 0;
1988   b->getrowactive = PETSC_FALSE;
1989 
1990   /* hash table stuff */
1991   b->ht           = 0;
1992   b->hd           = 0;
1993   b->ht_size      = 0;
1994   b->ht_flag      = 0;
1995   b->ht_fact      = 0;
1996   b->ht_total_ct  = 0;
1997   b->ht_insert_ct = 0;
1998 
1999   *A = B;
2000   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2001   if (flg) {
2002     double fact = 1.39;
2003     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2004     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2005     if (fact <= 1.0) fact = 1.39;
2006     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2007     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2008   }
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 #undef __FUNC__
2013 #define __FUNC__ "MatConvertSameType_MPIBAIJ"
2014 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
2015 {
2016   Mat         mat;
2017   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2018   int         ierr, len=0, flg;
2019 
2020   PetscFunctionBegin;
2021   *newmat       = 0;
2022   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
2023   PLogObjectCreate(mat);
2024   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2025   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
2026   mat->destroy    = MatDestroy_MPIBAIJ;
2027   mat->view       = MatView_MPIBAIJ;
2028   mat->factor     = matin->factor;
2029   mat->assembled  = PETSC_TRUE;
2030 
2031   a->m = mat->m   = oldmat->m;
2032   a->n = mat->n   = oldmat->n;
2033   a->M = mat->M   = oldmat->M;
2034   a->N = mat->N   = oldmat->N;
2035 
2036   a->bs  = oldmat->bs;
2037   a->bs2 = oldmat->bs2;
2038   a->mbs = oldmat->mbs;
2039   a->nbs = oldmat->nbs;
2040   a->Mbs = oldmat->Mbs;
2041   a->Nbs = oldmat->Nbs;
2042 
2043   a->rstart       = oldmat->rstart;
2044   a->rend         = oldmat->rend;
2045   a->cstart       = oldmat->cstart;
2046   a->cend         = oldmat->cend;
2047   a->size         = oldmat->size;
2048   a->rank         = oldmat->rank;
2049   mat->insertmode = NOT_SET_VALUES;
2050   a->rowvalues    = 0;
2051   a->getrowactive = PETSC_FALSE;
2052   a->barray       = 0;
2053 
2054   /* hash table stuff */
2055   a->ht           = 0;
2056   a->hd           = 0;
2057   a->ht_size      = 0;
2058   a->ht_flag      = oldmat->ht_flag;
2059   a->ht_fact      = oldmat->ht_fact;
2060   a->ht_total_ct  = 0;
2061   a->ht_insert_ct = 0;
2062 
2063 
2064   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2065   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2066   a->cowners = a->rowners + a->size + 2;
2067   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2068   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
2069   if (oldmat->colmap) {
2070     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2071     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2072     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2073   } else a->colmap = 0;
2074   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2075     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2076     PLogObjectMemory(mat,len*sizeof(int));
2077     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2078   } else a->garray = 0;
2079 
2080   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2081   PLogObjectParent(mat,a->lvec);
2082   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2083   PLogObjectParent(mat,a->Mvctx);
2084   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
2085   PLogObjectParent(mat,a->A);
2086   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
2087   PLogObjectParent(mat,a->B);
2088   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2089   if (flg) {
2090     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2091   }
2092   *newmat = mat;
2093   PetscFunctionReturn(0);
2094 }
2095 
2096 #include "sys.h"
2097 
2098 #undef __FUNC__
2099 #define __FUNC__ "MatLoad_MPIBAIJ"
2100 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2101 {
2102   Mat          A;
2103   int          i, nz, ierr, j,rstart, rend, fd;
2104   Scalar       *vals,*buf;
2105   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2106   MPI_Status   status;
2107   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2108   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2109   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,bs2,Mbs,mbs,extra_rows;
2110   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2111   int          dcount,kmax,k,nzcount,tmp;
2112 
2113   PetscFunctionBegin;
2114   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2115   bs2  = bs*bs;
2116 
2117   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2118   if (!rank) {
2119     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2120     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2121     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2122     if (header[3] < 0) {
2123       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2124     }
2125   }
2126 
2127   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2128   M = header[1]; N = header[2];
2129 
2130   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2131 
2132   /*
2133      This code adds extra rows to make sure the number of rows is
2134      divisible by the blocksize
2135   */
2136   Mbs        = M/bs;
2137   extra_rows = bs - M + bs*(Mbs);
2138   if (extra_rows == bs) extra_rows = 0;
2139   else                  Mbs++;
2140   if (extra_rows &&!rank) {
2141     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2142   }
2143 
2144   /* determine ownership of all rows */
2145   mbs = Mbs/size + ((Mbs % size) > rank);
2146   m   = mbs * bs;
2147   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2148   browners = rowners + size + 1;
2149   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2150   rowners[0] = 0;
2151   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2152   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2153   rstart = rowners[rank];
2154   rend   = rowners[rank+1];
2155 
2156   /* distribute row lengths to all processors */
2157   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2158   if (!rank) {
2159     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2160     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2161     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2162     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2163     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2164     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2165     PetscFree(sndcounts);
2166   } else {
2167     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2168   }
2169 
2170   if (!rank) {
2171     /* calculate the number of nonzeros on each processor */
2172     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2173     PetscMemzero(procsnz,size*sizeof(int));
2174     for ( i=0; i<size; i++ ) {
2175       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2176         procsnz[i] += rowlengths[j];
2177       }
2178     }
2179     PetscFree(rowlengths);
2180 
2181     /* determine max buffer needed and allocate it */
2182     maxnz = 0;
2183     for ( i=0; i<size; i++ ) {
2184       maxnz = PetscMax(maxnz,procsnz[i]);
2185     }
2186     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2187 
2188     /* read in my part of the matrix column indices  */
2189     nz = procsnz[0];
2190     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2191     mycols = ibuf;
2192     if (size == 1)  nz -= extra_rows;
2193     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2194     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2195 
2196     /* read in every ones (except the last) and ship off */
2197     for ( i=1; i<size-1; i++ ) {
2198       nz   = procsnz[i];
2199       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2200       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2201     }
2202     /* read in the stuff for the last proc */
2203     if ( size != 1 ) {
2204       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2205       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2206       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2207       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2208     }
2209     PetscFree(cols);
2210   } else {
2211     /* determine buffer space needed for message */
2212     nz = 0;
2213     for ( i=0; i<m; i++ ) {
2214       nz += locrowlens[i];
2215     }
2216     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2217     mycols = ibuf;
2218     /* receive message of column indices*/
2219     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2220     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2221     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2222   }
2223 
2224   /* loop over local rows, determining number of off diagonal entries */
2225   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2226   odlens = dlens + (rend-rstart);
2227   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2228   PetscMemzero(mask,3*Mbs*sizeof(int));
2229   masked1 = mask    + Mbs;
2230   masked2 = masked1 + Mbs;
2231   rowcount = 0; nzcount = 0;
2232   for ( i=0; i<mbs; i++ ) {
2233     dcount  = 0;
2234     odcount = 0;
2235     for ( j=0; j<bs; j++ ) {
2236       kmax = locrowlens[rowcount];
2237       for ( k=0; k<kmax; k++ ) {
2238         tmp = mycols[nzcount++]/bs;
2239         if (!mask[tmp]) {
2240           mask[tmp] = 1;
2241           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2242           else masked1[dcount++] = tmp;
2243         }
2244       }
2245       rowcount++;
2246     }
2247 
2248     dlens[i]  = dcount;
2249     odlens[i] = odcount;
2250 
2251     /* zero out the mask elements we set */
2252     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2253     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2254   }
2255 
2256   /* create our matrix */
2257   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2258          CHKERRQ(ierr);
2259   A = *newmat;
2260   MatSetOption(A,MAT_COLUMNS_SORTED);
2261 
2262   if (!rank) {
2263     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2264     /* read in my part of the matrix numerical values  */
2265     nz = procsnz[0];
2266     vals = buf;
2267     mycols = ibuf;
2268     if (size == 1)  nz -= extra_rows;
2269     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2270     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2271 
2272     /* insert into matrix */
2273     jj      = rstart*bs;
2274     for ( i=0; i<m; i++ ) {
2275       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2276       mycols += locrowlens[i];
2277       vals   += locrowlens[i];
2278       jj++;
2279     }
2280     /* read in other processors (except the last one) and ship out */
2281     for ( i=1; i<size-1; i++ ) {
2282       nz   = procsnz[i];
2283       vals = buf;
2284       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2285       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2286     }
2287     /* the last proc */
2288     if ( size != 1 ){
2289       nz   = procsnz[i] - extra_rows;
2290       vals = buf;
2291       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2292       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2293       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2294     }
2295     PetscFree(procsnz);
2296   } else {
2297     /* receive numeric values */
2298     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2299 
2300     /* receive message of values*/
2301     vals   = buf;
2302     mycols = ibuf;
2303     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2304     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2305     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2306 
2307     /* insert into matrix */
2308     jj      = rstart*bs;
2309     for ( i=0; i<m; i++ ) {
2310       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2311       mycols += locrowlens[i];
2312       vals   += locrowlens[i];
2313       jj++;
2314     }
2315   }
2316   PetscFree(locrowlens);
2317   PetscFree(buf);
2318   PetscFree(ibuf);
2319   PetscFree(rowners);
2320   PetscFree(dlens);
2321   PetscFree(mask);
2322   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2323   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 
2328 
2329 #undef __FUNC__
2330 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2331 /*@
2332    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2333 
2334    Input Parameters:
2335 .  mat  - the matrix
2336 .  fact - factor
2337 
2338    Notes:
2339    This can also be set by the command line option: -mat_use_hash_table fact
2340 
2341 .keywords: matrix, hashtable, factor, HT
2342 
2343 .seealso: MatSetOption()
2344 @*/
2345 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2346 {
2347   Mat_MPIBAIJ *baij;
2348 
2349   PetscFunctionBegin;
2350   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2351   if (mat->type != MATMPIBAIJ) {
2352       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2353   }
2354   baij = (Mat_MPIBAIJ*) mat->data;
2355   baij->ht_fact = fact;
2356   PetscFunctionReturn(0);
2357 }
2358