xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e1311b9049e89cb3452dcd306fde571f4b440ff2)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.113 1998/03/16 18:55:44 bsmith Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"         /*I "mat.h" I*/
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 
10 extern int MatSetUpMultiply_MPIBAIJ(Mat);
11 extern int DisAssemble_MPIBAIJ(Mat);
12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
14 
15 /*
16      Local utility routine that creates a mapping from the global column
17    number to the local number in the off-diagonal part of the local
18    storage of the matrix.  This is done in a non scable way since the
19    length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
23 static int CreateColmap_MPIBAIJ_Private(Mat mat)
24 {
25   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
26   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
27   int         nbs = B->nbs,i,bs=B->bs;;
28 
29   PetscFunctionBegin;
30   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
31   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
32   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
33   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
34   PetscFunctionReturn(0);
35 }
36 
37 #define CHUNKSIZE  10
38 
39 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
40 { \
41  \
42     brow = row/bs;  \
43     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
44     rmax = aimax[brow]; nrow = ailen[brow]; \
45       bcol = col/bs; \
46       ridx = row % bs; cidx = col % bs; \
47       low = 0; high = nrow; \
48       while (high-low > 3) { \
49         t = (low+high)/2; \
50         if (rp[t] > bcol) high = t; \
51         else              low  = t; \
52       } \
53       for ( _i=low; _i<high; _i++ ) { \
54         if (rp[_i] > bcol) break; \
55         if (rp[_i] == bcol) { \
56           bap  = ap +  bs2*_i + bs*cidx + ridx; \
57           if (addv == ADD_VALUES) *bap += value;  \
58           else                    *bap  = value;  \
59           goto a_noinsert; \
60         } \
61       } \
62       if (a->nonew == 1) goto a_noinsert; \
63       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
64       if (nrow >= rmax) { \
65         /* there is no extra room in row, therefore enlarge */ \
66         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
67         Scalar *new_a; \
68  \
69         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
70  \
71         /* malloc new storage space */ \
72         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
73         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
74         new_j   = (int *) (new_a + bs2*new_nz); \
75         new_i   = new_j + new_nz; \
76  \
77         /* copy over old data into new slots */ \
78         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
79         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
80         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
81         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
82         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
83                                                            len*sizeof(int)); \
84         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
85         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
86         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
87                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
88         /* free up old matrix storage */ \
89         PetscFree(a->a);  \
90         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
91         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
92         a->singlemalloc = 1; \
93  \
94         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
96         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
97         a->maxnz += bs2*CHUNKSIZE; \
98         a->reallocs++; \
99         a->nz++; \
100       } \
101       N = nrow++ - 1;  \
102       /* shift up all the later entries in this row */ \
103       for ( ii=N; ii>=_i; ii-- ) { \
104         rp[ii+1] = rp[ii]; \
105         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
106       } \
107       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
108       rp[_i]                      = bcol;  \
109       ap[bs2*_i + bs*cidx + ridx] = value;  \
110       a_noinsert:; \
111     ailen[brow] = nrow; \
112 }
113 
114 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
115 { \
116  \
117     brow = row/bs;  \
118     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
119     rmax = bimax[brow]; nrow = bilen[brow]; \
120       bcol = col/bs; \
121       ridx = row % bs; cidx = col % bs; \
122       low = 0; high = nrow; \
123       while (high-low > 3) { \
124         t = (low+high)/2; \
125         if (rp[t] > bcol) high = t; \
126         else              low  = t; \
127       } \
128       for ( _i=low; _i<high; _i++ ) { \
129         if (rp[_i] > bcol) break; \
130         if (rp[_i] == bcol) { \
131           bap  = ap +  bs2*_i + bs*cidx + ridx; \
132           if (addv == ADD_VALUES) *bap += value;  \
133           else                    *bap  = value;  \
134           goto b_noinsert; \
135         } \
136       } \
137       if (b->nonew == 1) goto b_noinsert; \
138       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139       if (nrow >= rmax) { \
140         /* there is no extra room in row, therefore enlarge */ \
141         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
142         Scalar *new_a; \
143  \
144         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
145  \
146         /* malloc new storage space */ \
147         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
148         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
149         new_j   = (int *) (new_a + bs2*new_nz); \
150         new_i   = new_j + new_nz; \
151  \
152         /* copy over old data into new slots */ \
153         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
154         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
156         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
157         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
158                                                            len*sizeof(int)); \
159         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
160         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
161         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
162                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
163         /* free up old matrix storage */ \
164         PetscFree(b->a);  \
165         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
166         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
167         b->singlemalloc = 1; \
168  \
169         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
171         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
172         b->maxnz += bs2*CHUNKSIZE; \
173         b->reallocs++; \
174         b->nz++; \
175       } \
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for ( ii=N; ii>=_i; ii-- ) { \
179         rp[ii+1] = rp[ii]; \
180         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
181       } \
182       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNC__
190 #define __FUNC__ "MatSetValues_MPIBAIJ"
191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
192 {
193   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
194   Scalar      value;
195   int         ierr,i,j,row,col;
196   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
197   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
198   int         cend_orig=baij->cend_bs,bs=baij->bs;
199 
200   /* Some Variables required in the macro */
201   Mat         A = baij->A;
202   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
203   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204   Scalar      *aa=a->a;
205 
206   Mat         B = baij->B;
207   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
208   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209   Scalar      *ba=b->a;
210 
211   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
212   int         low,high,t,ridx,cidx,bs2=a->bs2;
213   Scalar      *ap,*bap;
214 
215   PetscFunctionBegin;
216   for ( i=0; i<m; i++ ) {
217 #if defined(USE_PETSC_BOPT_g)
218     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
219     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for ( j=0; j<n; j++ ) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig){
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         }
230 #if defined(USE_PETSC_BOPT_g)
231         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
232         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
240             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
241               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
244               B = baij->B;
245               b = (Mat_SeqBAIJ *) (B)->data;
246               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
247               ba=b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     } else {
256       if (roworiented && !baij->donotstash) {
257         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258       } else {
259         if (!baij->donotstash) {
260           row = im[i];
261 	  for ( j=0; j<n; j++ ) {
262 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
263           }
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
272 #undef __FUNC__
273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
275 {
276   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
277   Scalar      *value,*barray=baij->barray;
278   int         ierr,i,j,ii,jj,row,col,k,l;
279   int         roworiented = baij->roworiented,rstart=baij->rstart ;
280   int         rend=baij->rend,cstart=baij->cstart,stepval;
281   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
282 
283   if(!barray) {
284     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
285   }
286 
287   if (roworiented) {
288     stepval = (n-1)*bs;
289   } else {
290     stepval = (m-1)*bs;
291   }
292   for ( i=0; i<m; i++ ) {
293 #if defined(USE_PETSC_BOPT_g)
294     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
295     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
296 #endif
297     if (im[i] >= rstart && im[i] < rend) {
298       row = im[i] - rstart;
299       for ( j=0; j<n; j++ ) {
300         /* If NumCol = 1 then a copy is not required */
301         if ((roworiented) && (n == 1)) {
302           barray = v + i*bs2;
303         } else if((!roworiented) && (m == 1)) {
304           barray = v + j*bs2;
305         } else { /* Here a copy is required */
306           if (roworiented) {
307             value = v + i*(stepval+bs)*bs + j*bs;
308           } else {
309             value = v + j*(stepval+bs)*bs + i*bs;
310           }
311           for ( ii=0; ii<bs; ii++,value+=stepval ) {
312             for (jj=0; jj<bs; jj++ ) {
313               *barray++  = *value++;
314             }
315           }
316           barray -=bs2;
317         }
318 
319         if (in[j] >= cstart && in[j] < cend){
320           col  = in[j] - cstart;
321           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
322         }
323 #if defined(USE_PETSC_BOPT_g)
324         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
325         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
326 #endif
327         else {
328           if (mat->was_assembled) {
329             if (!baij->colmap) {
330               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
331             }
332 
333 #if defined(USE_PETSC_BOPT_g)
334             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
335 #endif
336             col = (baij->colmap[in[j]] - 1)/bs;
337             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
338               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
339               col =  in[j];
340             }
341           }
342           else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (!baij->donotstash) {
348         if (roworiented ) {
349           row   = im[i]*bs;
350           value = v + i*(stepval+bs)*bs;
351           for ( j=0; j<bs; j++,row++ ) {
352             for ( k=0; k<n; k++ ) {
353               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
354                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
355               }
356             }
357           }
358         } else {
359           for ( j=0; j<n; j++ ) {
360             value = v + j*(stepval+bs)*bs + i*bs;
361             col   = in[j]*bs;
362             for ( k=0; k<bs; k++,col++,value+=stepval) {
363               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
364                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
365               }
366             }
367           }
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 #include <math.h>
375 #define HASH_KEY 0.6180339887
376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNC__
380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
382 {
383   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
384   int         ierr,i,j,row,col;
385   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
386   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
387   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
388   double      tmp;
389   Scalar      ** HD = baij->hd,value;
390 #if defined(USE_PETSC_BOPT_g)
391   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
392 #endif
393 
394   PetscFunctionBegin;
395 
396   for ( i=0; i<m; i++ ) {
397 #if defined(USE_PETSC_BOPT_g)
398     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
399     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
400 #endif
401       row = im[i];
402     if (row >= rstart_orig && row < rend_orig) {
403       for ( j=0; j<n; j++ ) {
404         col = in[j];
405         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
406         /* Look up into the Hash Table */
407         key = (row/bs)*Nbs+(col/bs)+1;
408         h1  = HASH(size,key,tmp);
409 
410 
411         idx = h1;
412 #if defined(USE_PETSC_BOPT_g)
413         insert_ct++;
414         total_ct++;
415         if (HT[idx] != key) {
416           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
417           if (idx == size) {
418             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
419             if (idx == h1) {
420               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
421             }
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
427           if (idx == size) {
428             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
429             if (idx == h1) {
430               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
431             }
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else {
440       if (roworiented && !baij->donotstash) {
441         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
442       } else {
443         if (!baij->donotstash) {
444           row = im[i];
445 	  for ( j=0; j<n; j++ ) {
446 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
447           }
448         }
449       }
450     }
451   }
452 #if defined(USE_PETSC_BOPT_g)
453   baij->ht_total_ct = total_ct;
454   baij->ht_insert_ct = insert_ct;
455 #endif
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNC__
460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
462 {
463   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
464   int         ierr,i,j,ii,jj,row,col,k,l;
465   int         roworiented = baij->roworiented,rstart=baij->rstart ;
466   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
467   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
468   double      tmp;
469   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
470 #if defined(USE_PETSC_BOPT_g)
471   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
472 #endif
473 
474   PetscFunctionBegin;
475 
476   if (roworiented) {
477     stepval = (n-1)*bs;
478   } else {
479     stepval = (m-1)*bs;
480   }
481   for ( i=0; i<m; i++ ) {
482 #if defined(USE_PETSC_BOPT_g)
483     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
484     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
485 #endif
486     row   = im[i];
487     v_t   = v + i*bs2;
488     if (row >= rstart && row < rend) {
489       for ( j=0; j<n; j++ ) {
490         col = in[j];
491 
492         /* Look up into the Hash Table */
493         key = row*Nbs+col+1;
494         h1  = HASH(size,key,tmp);
495 
496         idx = h1;
497 #if defined(USE_PETSC_BOPT_g)
498         total_ct++;
499         insert_ct++;
500        if (HT[idx] != key) {
501           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
502           if (idx == size) {
503             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
504             if (idx == h1) {
505               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
506             }
507           }
508         }
509 #else
510         if (HT[idx] != key) {
511           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
512           if (idx == size) {
513             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
514             if (idx == h1) {
515               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
516             }
517           }
518         }
519 #endif
520         baij_a = HD[idx];
521         if (roworiented) {
522           /*value = v + i*(stepval+bs)*bs + j*bs;*/
523           /* value = v + (i*(stepval+bs)+j)*bs; */
524           value = v_t;
525           v_t  += bs;
526           if (addv == ADD_VALUES) {
527             for ( ii=0; ii<bs; ii++,value+=stepval) {
528               for ( jj=ii; jj<bs2; jj+=bs ) {
529                 baij_a[jj]  += *value++;
530               }
531             }
532           } else {
533             for ( ii=0; ii<bs; ii++,value+=stepval) {
534               for ( jj=ii; jj<bs2; jj+=bs ) {
535                 baij_a[jj]  = *value++;
536               }
537             }
538           }
539         } else {
540           value = v + j*(stepval+bs)*bs + i*bs;
541           if (addv == ADD_VALUES) {
542             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
543               for ( jj=0; jj<bs; jj++ ) {
544                 baij_a[jj]  += *value++;
545               }
546             }
547           } else {
548             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
549               for ( jj=0; jj<bs; jj++ ) {
550                 baij_a[jj]  = *value++;
551               }
552             }
553           }
554         }
555       }
556     } else {
557       if (!baij->donotstash) {
558         if (roworiented ) {
559           row   = im[i]*bs;
560           value = v + i*(stepval+bs)*bs;
561           for ( j=0; j<bs; j++,row++ ) {
562             for ( k=0; k<n; k++ ) {
563               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
564                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
565               }
566             }
567           }
568         } else {
569           for ( j=0; j<n; j++ ) {
570             value = v + j*(stepval+bs)*bs + i*bs;
571             col   = in[j]*bs;
572             for ( k=0; k<bs; k++,col++,value+=stepval) {
573               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
574                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
575               }
576             }
577           }
578         }
579       }
580     }
581   }
582 #if defined(USE_PETSC_BOPT_g)
583   baij->ht_total_ct = total_ct;
584   baij->ht_insert_ct = insert_ct;
585 #endif
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNC__
590 #define __FUNC__ "MatGetValues_MPIBAIJ"
591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
592 {
593   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
594   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
595   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
596 
597   PetscFunctionBegin;
598   for ( i=0; i<m; i++ ) {
599     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
600     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
601     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
602       row = idxm[i] - bsrstart;
603       for ( j=0; j<n; j++ ) {
604         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
605         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
606         if (idxn[j] >= bscstart && idxn[j] < bscend){
607           col = idxn[j] - bscstart;
608           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
609         } else {
610           if (!baij->colmap) {
611             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
612           }
613           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
614              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
615           else {
616             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
617             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
618           }
619         }
620       }
621     } else {
622       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
623     }
624   }
625  PetscFunctionReturn(0);
626 }
627 
628 #undef __FUNC__
629 #define __FUNC__ "MatNorm_MPIBAIJ"
630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
631 {
632   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
633   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
634   int        ierr, i,bs2=baij->bs2;
635   double     sum = 0.0;
636   Scalar     *v;
637 
638   PetscFunctionBegin;
639   if (baij->size == 1) {
640     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
641   } else {
642     if (type == NORM_FROBENIUS) {
643       v = amat->a;
644       for (i=0; i<amat->nz*bs2; i++ ) {
645 #if defined(USE_PETSC_COMPLEX)
646         sum += real(conj(*v)*(*v)); v++;
647 #else
648         sum += (*v)*(*v); v++;
649 #endif
650       }
651       v = bmat->a;
652       for (i=0; i<bmat->nz*bs2; i++ ) {
653 #if defined(USE_PETSC_COMPLEX)
654         sum += real(conj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
660       *norm = sqrt(*norm);
661     } else {
662       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
663     }
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
673   MPI_Comm    comm = mat->comm;
674   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
675   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
676   MPI_Request *send_waits,*recv_waits;
677   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
678   InsertMode  addv;
679   Scalar      *rvalues,*svalues;
680 
681   PetscFunctionBegin;
682   /* make sure all processors are either in INSERTMODE or ADDMODE */
683   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
684   if (addv == (ADD_VALUES|INSERT_VALUES)) {
685     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
686   }
687   mat->insertmode = addv; /* in case this processor had no cache */
688 
689   /*  first count number of contributors to each processor */
690   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
691   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
692   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
693   for ( i=0; i<baij->stash.n; i++ ) {
694     idx = baij->stash.idx[i];
695     for ( j=0; j<size; j++ ) {
696       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
697         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
698       }
699     }
700   }
701   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
702 
703   /* inform other processors of number of messages and max length*/
704   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
705   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
706   nreceives = work[rank];
707   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
708   nmax      = work[rank];
709   PetscFree(work);
710 
711   /* post receives:
712        1) each message will consist of ordered pairs
713      (global index,value) we store the global index as a double
714      to simplify the message passing.
715        2) since we don't know how long each individual message is we
716      allocate the largest needed buffer for each receive. Potentially
717      this is a lot of wasted space.
718 
719 
720        This could be done better.
721   */
722   rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
723   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
724   for ( i=0; i<nreceives; i++ ) {
725     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
726   }
727 
728   /* do sends:
729       1) starts[i] gives the starting index in svalues for stuff going to
730          the ith processor
731   */
732   svalues = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
733   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
734   starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
735   starts[0] = 0;
736   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
737   for ( i=0; i<baij->stash.n; i++ ) {
738     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
739     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
740     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
741   }
742   PetscFree(owner);
743   starts[0] = 0;
744   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
745   count = 0;
746   for ( i=0; i<size; i++ ) {
747     if (procs[i]) {
748       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
749     }
750   }
751   PetscFree(starts); PetscFree(nprocs);
752 
753   /* Free cache space */
754   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
755   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
756 
757   baij->svalues    = svalues;    baij->rvalues    = rvalues;
758   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
759   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
760   baij->rmax       = nmax;
761 
762   PetscFunctionReturn(0);
763 }
764 
765 /*
766   Creates the hash table, and sets the table
767   This table is created only once.
768   If new entried need to be added to the matrix
769   then the hash table has to be destroyed and
770   recreated.
771 */
772 #undef __FUNC__
773 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
774 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
775 {
776   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
777   Mat         A = baij->A, B=baij->B;
778   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
779   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
780   int         size,bs2=baij->bs2,rstart=baij->rstart;
781   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
782   int         *HT,key;
783   Scalar      **HD;
784   double      tmp;
785 #if defined(USE_PETSC_BOPT_g)
786   int         ct=0,max=0;
787 #endif
788 
789   PetscFunctionBegin;
790   baij->ht_size=(int)(factor*nz);
791   size = baij->ht_size;
792 
793   if (baij->ht) {
794     PetscFunctionReturn(0);
795   }
796 
797   /* Allocate Memory for Hash Table */
798   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
799   baij->ht = (int*)(baij->hd + size);
800   HD = baij->hd;
801   HT = baij->ht;
802 
803 
804   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
805 
806 
807   /* Loop Over A */
808   for ( i=0; i<a->mbs; i++ ) {
809     for ( j=ai[i]; j<ai[i+1]; j++ ) {
810       row = i+rstart;
811       col = aj[j]+cstart;
812 
813       key = row*Nbs + col + 1;
814       h1  = HASH(size,key,tmp);
815       for ( k=0; k<size; k++ ){
816         if (HT[(h1+k)%size] == 0.0) {
817           HT[(h1+k)%size] = key;
818           HD[(h1+k)%size] = a->a + j*bs2;
819           break;
820 #if defined(USE_PETSC_BOPT_g)
821         } else {
822           ct++;
823 #endif
824         }
825       }
826 #if defined(USE_PETSC_BOPT_g)
827       if (k> max) max = k;
828 #endif
829     }
830   }
831   /* Loop Over B */
832   for ( i=0; i<b->mbs; i++ ) {
833     for ( j=bi[i]; j<bi[i+1]; j++ ) {
834       row = i+rstart;
835       col = garray[bj[j]];
836       key = row*Nbs + col + 1;
837       h1  = HASH(size,key,tmp);
838       for ( k=0; k<size; k++ ){
839         if (HT[(h1+k)%size] == 0.0) {
840           HT[(h1+k)%size] = key;
841           HD[(h1+k)%size] = b->a + j*bs2;
842           break;
843 #if defined(USE_PETSC_BOPT_g)
844         } else {
845           ct++;
846 #endif
847         }
848       }
849 #if defined(USE_PETSC_BOPT_g)
850       if (k> max) max = k;
851 #endif
852     }
853   }
854 
855   /* Print Summary */
856 #if defined(USE_PETSC_BOPT_g)
857   for ( i=0,j=0; i<size; i++)
858     if (HT[i]) {j++;}
859   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
860            (j== 0)? 0.0:((double)(ct+j))/j,max);
861 #endif
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNC__
866 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
867 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
868 {
869   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
870   MPI_Status  *send_status,recv_status;
871   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
872   int         bs=baij->bs,row,col,other_disassembled;
873   Scalar      *values,val;
874   InsertMode  addv = mat->insertmode;
875 
876   PetscFunctionBegin;
877   /*  wait on receives */
878   while (count) {
879     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
880     /* unpack receives into our local space */
881     values = baij->rvalues + 3*imdex*baij->rmax;
882     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
883     n = n/3;
884     for ( i=0; i<n; i++ ) {
885       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
886       col = (int) PetscReal(values[3*i+1]);
887       val = values[3*i+2];
888       if (col >= baij->cstart*bs && col < baij->cend*bs) {
889         col -= baij->cstart*bs;
890         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
891       } else {
892         if (mat->was_assembled) {
893           if (!baij->colmap) {
894             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
895           }
896           col = (baij->colmap[col/bs]) - 1 + col%bs;
897           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
898             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
899             col = (int) PetscReal(values[3*i+1]);
900           }
901         }
902         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
903       }
904     }
905     count--;
906   }
907   PetscFree(baij->recv_waits); PetscFree(baij->rvalues);
908 
909   /* wait on sends */
910   if (baij->nsends) {
911     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
912     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
913     PetscFree(send_status);
914   }
915   PetscFree(baij->send_waits); PetscFree(baij->svalues);
916 
917   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
918   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
919 
920   /* determine if any processor has disassembled, if so we must
921      also disassemble ourselfs, in order that we may reassemble. */
922   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
923   if (mat->was_assembled && !other_disassembled) {
924     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
929   }
930   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
932 
933 #if defined(USE_PETSC_BOPT_g)
934   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
935     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
936              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNC__
952 #define __FUNC__ "MatView_MPIBAIJ_Binary"
953 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
954 {
955   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
956   int          ierr;
957 
958   PetscFunctionBegin;
959   if (baij->size == 1) {
960     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
961   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNC__
966 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
967 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
968 {
969   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
970   int          ierr, format,rank,bs = baij->bs;
971   FILE         *fd;
972   ViewerType   vtype;
973 
974   PetscFunctionBegin;
975   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
976   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
977     ierr = ViewerGetFormat(viewer,&format);
978     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
979       MatInfo info;
980       MPI_Comm_rank(mat->comm,&rank);
981       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
982       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
983       PetscSequentialPhaseBegin(mat->comm,1);
984       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
985               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
986               baij->bs,(int)info.memory);
987       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
988       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
989       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
990       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
991       fflush(fd);
992       PetscSequentialPhaseEnd(mat->comm,1);
993       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
994       PetscFunctionReturn(0);
995     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
996       PetscPrintf(mat->comm,"  block size is %d\n",bs);
997       PetscFunctionReturn(0);
998     }
999   }
1000 
1001   if (vtype == DRAW_VIEWER) {
1002     Draw       draw;
1003     PetscTruth isnull;
1004     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
1005     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1006   }
1007 
1008   if (vtype == ASCII_FILE_VIEWER) {
1009     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1010     PetscSequentialPhaseBegin(mat->comm,1);
1011     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
1012            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
1013             baij->cstart*bs,baij->cend*bs);
1014     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1015     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
1016     fflush(fd);
1017     PetscSequentialPhaseEnd(mat->comm,1);
1018   } else {
1019     int size = baij->size;
1020     rank = baij->rank;
1021     if (size == 1) {
1022       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1023     } else {
1024       /* assemble the entire matrix onto first processor. */
1025       Mat         A;
1026       Mat_SeqBAIJ *Aloc;
1027       int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
1028       int         mbs=baij->mbs;
1029       Scalar      *a;
1030 
1031       if (!rank) {
1032         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1033       } else {
1034         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1035       }
1036       PLogObjectParent(mat,A);
1037 
1038       /* copy over the A part */
1039       Aloc = (Mat_SeqBAIJ*) baij->A->data;
1040       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1041       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1042 
1043       for ( i=0; i<mbs; i++ ) {
1044         rvals[0] = bs*(baij->rstart + i);
1045         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1046         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1047           col = (baij->cstart+aj[j])*bs;
1048           for (k=0; k<bs; k++ ) {
1049             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1050             col++; a += bs;
1051           }
1052         }
1053       }
1054       /* copy over the B part */
1055       Aloc = (Mat_SeqBAIJ*) baij->B->data;
1056       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1057       for ( i=0; i<mbs; i++ ) {
1058         rvals[0] = bs*(baij->rstart + i);
1059         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1060         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1061           col = baij->garray[aj[j]]*bs;
1062           for (k=0; k<bs; k++ ) {
1063             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1064             col++; a += bs;
1065           }
1066         }
1067       }
1068       PetscFree(rvals);
1069       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1070       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1071       /*
1072          Everyone has to call to draw the matrix since the graphics waits are
1073          synchronized across all processors that share the Draw object
1074       */
1075       if (!rank || vtype == DRAW_VIEWER) {
1076         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1077       }
1078       ierr = MatDestroy(A); CHKERRQ(ierr);
1079     }
1080   }
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 
1085 
1086 #undef __FUNC__
1087 #define __FUNC__ "MatView_MPIBAIJ"
1088 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1089 {
1090   int         ierr;
1091   ViewerType  vtype;
1092 
1093   PetscFunctionBegin;
1094   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1095   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
1096       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
1097     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
1098   } else if (vtype == BINARY_FILE_VIEWER) {
1099     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1100   }
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNC__
1105 #define __FUNC__ "MatDestroy_MPIBAIJ"
1106 int MatDestroy_MPIBAIJ(Mat mat)
1107 {
1108   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1109   int         ierr;
1110 
1111   PetscFunctionBegin;
1112 #if defined(USE_PETSC_LOG)
1113   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1114 #endif
1115 
1116   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1117   PetscFree(baij->rowners);
1118   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1119   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1120   if (baij->colmap) PetscFree(baij->colmap);
1121   if (baij->garray) PetscFree(baij->garray);
1122   if (baij->lvec)   VecDestroy(baij->lvec);
1123   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1124   if (baij->rowvalues) PetscFree(baij->rowvalues);
1125   if (baij->barray) PetscFree(baij->barray);
1126   if (baij->hd) PetscFree(baij->hd);
1127   PetscFree(baij);
1128   PLogObjectDestroy(mat);
1129   PetscHeaderDestroy(mat);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNC__
1134 #define __FUNC__ "MatMult_MPIBAIJ"
1135 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1136 {
1137   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1138   int         ierr, nt;
1139 
1140   PetscFunctionBegin;
1141   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1142   if (nt != a->n) {
1143     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1144   }
1145   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1146   if (nt != a->m) {
1147     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1148   }
1149   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1150   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1151   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1152   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1153   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNC__
1158 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1159 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1160 {
1161   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1162   int        ierr;
1163 
1164   PetscFunctionBegin;
1165   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1166   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1167   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1168   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNC__
1173 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1174 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1175 {
1176   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1177   int         ierr;
1178 
1179   PetscFunctionBegin;
1180   /* do nondiagonal part */
1181   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1182   /* send it on its way */
1183   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1184   /* do local part */
1185   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1186   /* receive remote parts: note this assumes the values are not actually */
1187   /* inserted in yy until the next line, which is true for my implementation*/
1188   /* but is not perhaps always true. */
1189   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNC__
1194 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1195 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1196 {
1197   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1198   int         ierr;
1199 
1200   PetscFunctionBegin;
1201   /* do nondiagonal part */
1202   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1203   /* send it on its way */
1204   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1205   /* do local part */
1206   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1207   /* receive remote parts: note this assumes the values are not actually */
1208   /* inserted in yy until the next line, which is true for my implementation*/
1209   /* but is not perhaps always true. */
1210   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 /*
1215   This only works correctly for square matrices where the subblock A->A is the
1216    diagonal block
1217 */
1218 #undef __FUNC__
1219 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1220 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1221 {
1222   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1223   int         ierr;
1224 
1225   PetscFunctionBegin;
1226   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1227   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNC__
1232 #define __FUNC__ "MatScale_MPIBAIJ"
1233 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1234 {
1235   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1236   int         ierr;
1237 
1238   PetscFunctionBegin;
1239   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1240   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNC__
1245 #define __FUNC__ "MatGetSize_MPIBAIJ"
1246 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1247 {
1248   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1249 
1250   PetscFunctionBegin;
1251   if (m) *m = mat->M;
1252   if (n) *n = mat->N;
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNC__
1257 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1258 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1259 {
1260   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1261 
1262   PetscFunctionBegin;
1263   *m = mat->m; *n = mat->n;
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNC__
1268 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1269 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1270 {
1271   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1272 
1273   PetscFunctionBegin;
1274   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1279 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1280 
1281 #undef __FUNC__
1282 #define __FUNC__ "MatGetRow_MPIBAIJ"
1283 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1284 {
1285   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1286   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1287   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1288   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1289   int        *cmap, *idx_p,cstart = mat->cstart;
1290 
1291   PetscFunctionBegin;
1292   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1293   mat->getrowactive = PETSC_TRUE;
1294 
1295   if (!mat->rowvalues && (idx || v)) {
1296     /*
1297         allocate enough space to hold information from the longest row.
1298     */
1299     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1300     int     max = 1,mbs = mat->mbs,tmp;
1301     for ( i=0; i<mbs; i++ ) {
1302       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1303       if (max < tmp) { max = tmp; }
1304     }
1305     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1306     CHKPTRQ(mat->rowvalues);
1307     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1308   }
1309 
1310   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1311   lrow = row - brstart;
1312 
1313   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1314   if (!v)   {pvA = 0; pvB = 0;}
1315   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1316   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1317   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1318   nztot = nzA + nzB;
1319 
1320   cmap  = mat->garray;
1321   if (v  || idx) {
1322     if (nztot) {
1323       /* Sort by increasing column numbers, assuming A and B already sorted */
1324       int imark = -1;
1325       if (v) {
1326         *v = v_p = mat->rowvalues;
1327         for ( i=0; i<nzB; i++ ) {
1328           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1329           else break;
1330         }
1331         imark = i;
1332         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1333         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1334       }
1335       if (idx) {
1336         *idx = idx_p = mat->rowindices;
1337         if (imark > -1) {
1338           for ( i=0; i<imark; i++ ) {
1339             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1340           }
1341         } else {
1342           for ( i=0; i<nzB; i++ ) {
1343             if (cmap[cworkB[i]/bs] < cstart)
1344               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1345             else break;
1346           }
1347           imark = i;
1348         }
1349         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1350         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1351       }
1352     } else {
1353       if (idx) *idx = 0;
1354       if (v)   *v   = 0;
1355     }
1356   }
1357   *nz = nztot;
1358   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1359   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #undef __FUNC__
1364 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1365 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1366 {
1367   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1368 
1369   PetscFunctionBegin;
1370   if (baij->getrowactive == PETSC_FALSE) {
1371     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1372   }
1373   baij->getrowactive = PETSC_FALSE;
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNC__
1378 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1379 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1380 {
1381   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1382 
1383   PetscFunctionBegin;
1384   *bs = baij->bs;
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNC__
1389 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1390 int MatZeroEntries_MPIBAIJ(Mat A)
1391 {
1392   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1393   int         ierr;
1394 
1395   PetscFunctionBegin;
1396   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1397   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNC__
1402 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1403 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1404 {
1405   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1406   Mat         A = a->A, B = a->B;
1407   int         ierr;
1408   double      isend[5], irecv[5];
1409 
1410   PetscFunctionBegin;
1411   info->block_size     = (double)a->bs;
1412   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1413   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
1414   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1415   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
1416   if (flag == MAT_LOCAL) {
1417     info->nz_used      = isend[0];
1418     info->nz_allocated = isend[1];
1419     info->nz_unneeded  = isend[2];
1420     info->memory       = isend[3];
1421     info->mallocs      = isend[4];
1422   } else if (flag == MAT_GLOBAL_MAX) {
1423     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr);
1424     info->nz_used      = irecv[0];
1425     info->nz_allocated = irecv[1];
1426     info->nz_unneeded  = irecv[2];
1427     info->memory       = irecv[3];
1428     info->mallocs      = irecv[4];
1429   } else if (flag == MAT_GLOBAL_SUM) {
1430     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr);
1431     info->nz_used      = irecv[0];
1432     info->nz_allocated = irecv[1];
1433     info->nz_unneeded  = irecv[2];
1434     info->memory       = irecv[3];
1435     info->mallocs      = irecv[4];
1436   }
1437   info->rows_global       = (double)a->M;
1438   info->columns_global    = (double)a->N;
1439   info->rows_local        = (double)a->m;
1440   info->columns_local     = (double)a->N;
1441   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1442   info->fill_ratio_needed = 0;
1443   info->factor_mallocs    = 0;
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNC__
1448 #define __FUNC__ "MatSetOption_MPIBAIJ"
1449 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1450 {
1451   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1452 
1453   PetscFunctionBegin;
1454   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1455       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1456       op == MAT_COLUMNS_UNSORTED ||
1457       op == MAT_COLUMNS_SORTED ||
1458       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1459       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1460         MatSetOption(a->A,op);
1461         MatSetOption(a->B,op);
1462   } else if (op == MAT_ROW_ORIENTED) {
1463         a->roworiented = 1;
1464         MatSetOption(a->A,op);
1465         MatSetOption(a->B,op);
1466   } else if (op == MAT_ROWS_SORTED ||
1467              op == MAT_ROWS_UNSORTED ||
1468              op == MAT_SYMMETRIC ||
1469              op == MAT_STRUCTURALLY_SYMMETRIC ||
1470              op == MAT_YES_NEW_DIAGONALS ||
1471              op == MAT_USE_HASH_TABLE)
1472     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1473   else if (op == MAT_COLUMN_ORIENTED) {
1474     a->roworiented = 0;
1475     MatSetOption(a->A,op);
1476     MatSetOption(a->B,op);
1477   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1478     a->donotstash = 1;
1479   } else if (op == MAT_NO_NEW_DIAGONALS) {
1480     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1481   } else if (op == MAT_USE_HASH_TABLE) {
1482     a->ht_flag = 1;
1483   } else {
1484     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1485   }
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNC__
1490 #define __FUNC__ "MatTranspose_MPIBAIJ("
1491 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1492 {
1493   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1494   Mat_SeqBAIJ *Aloc;
1495   Mat        B;
1496   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1497   int        bs=baij->bs,mbs=baij->mbs;
1498   Scalar     *a;
1499 
1500   PetscFunctionBegin;
1501   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1502   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1503   CHKERRQ(ierr);
1504 
1505   /* copy over the A part */
1506   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1507   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1508   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1509 
1510   for ( i=0; i<mbs; i++ ) {
1511     rvals[0] = bs*(baij->rstart + i);
1512     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1513     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1514       col = (baij->cstart+aj[j])*bs;
1515       for (k=0; k<bs; k++ ) {
1516         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1517         col++; a += bs;
1518       }
1519     }
1520   }
1521   /* copy over the B part */
1522   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1523   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1524   for ( i=0; i<mbs; i++ ) {
1525     rvals[0] = bs*(baij->rstart + i);
1526     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1527     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1528       col = baij->garray[aj[j]]*bs;
1529       for (k=0; k<bs; k++ ) {
1530         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1531         col++; a += bs;
1532       }
1533     }
1534   }
1535   PetscFree(rvals);
1536   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1537   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1538 
1539   if (matout != PETSC_NULL) {
1540     *matout = B;
1541   } else {
1542     PetscOps       *Abops;
1543     struct _MatOps *Aops;
1544 
1545     /* This isn't really an in-place transpose .... but free data structures from baij */
1546     PetscFree(baij->rowners);
1547     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1548     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1549     if (baij->colmap) PetscFree(baij->colmap);
1550     if (baij->garray) PetscFree(baij->garray);
1551     if (baij->lvec) VecDestroy(baij->lvec);
1552     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1553     PetscFree(baij);
1554 
1555     /*
1556        This is horrible, horrible code. We need to keep the
1557       A pointers for the bops and ops but copy everything
1558       else from C.
1559     */
1560     Abops = A->bops;
1561     Aops  = A->ops;
1562     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1563     A->bops = Abops;
1564     A->ops  = Aops;
1565 
1566     PetscHeaderDestroy(B);
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNC__
1572 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1573 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1574 {
1575   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1576   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1577   int ierr,s1,s2,s3;
1578 
1579   PetscFunctionBegin;
1580   if (ll)  {
1581     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1582     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1583     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1584     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1585     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1586   }
1587   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNC__
1592 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1593 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1594 {
1595   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1596   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1597   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1598   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1599   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1600   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1601   MPI_Comm       comm = A->comm;
1602   MPI_Request    *send_waits,*recv_waits;
1603   MPI_Status     recv_status,*send_status;
1604   IS             istmp;
1605 
1606   PetscFunctionBegin;
1607   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1608   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1609 
1610   /*  first count number of contributors to each processor */
1611   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1612   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1613   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1614   for ( i=0; i<N; i++ ) {
1615     idx = rows[i];
1616     found = 0;
1617     for ( j=0; j<size; j++ ) {
1618       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1619         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1620       }
1621     }
1622     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1623   }
1624   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1625 
1626   /* inform other processors of number of messages and max length*/
1627   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1628   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1629   nrecvs = work[rank];
1630   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1631   nmax   = work[rank];
1632   PetscFree(work);
1633 
1634   /* post receives:   */
1635   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1636   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1637   for ( i=0; i<nrecvs; i++ ) {
1638     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1639   }
1640 
1641   /* do sends:
1642      1) starts[i] gives the starting index in svalues for stuff going to
1643      the ith processor
1644   */
1645   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1646   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1647   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1648   starts[0] = 0;
1649   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1650   for ( i=0; i<N; i++ ) {
1651     svalues[starts[owner[i]]++] = rows[i];
1652   }
1653   ISRestoreIndices(is,&rows);
1654 
1655   starts[0] = 0;
1656   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1657   count = 0;
1658   for ( i=0; i<size; i++ ) {
1659     if (procs[i]) {
1660       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1661     }
1662   }
1663   PetscFree(starts);
1664 
1665   base = owners[rank]*bs;
1666 
1667   /*  wait on receives */
1668   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1669   source = lens + nrecvs;
1670   count  = nrecvs; slen = 0;
1671   while (count) {
1672     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1673     /* unpack receives into our local space */
1674     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1675     source[imdex]  = recv_status.MPI_SOURCE;
1676     lens[imdex]  = n;
1677     slen += n;
1678     count--;
1679   }
1680   PetscFree(recv_waits);
1681 
1682   /* move the data into the send scatter */
1683   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1684   count = 0;
1685   for ( i=0; i<nrecvs; i++ ) {
1686     values = rvalues + i*nmax;
1687     for ( j=0; j<lens[i]; j++ ) {
1688       lrows[count++] = values[j] - base;
1689     }
1690   }
1691   PetscFree(rvalues); PetscFree(lens);
1692   PetscFree(owner); PetscFree(nprocs);
1693 
1694   /* actually zap the local rows */
1695   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1696   PLogObjectParent(A,istmp);
1697 
1698   ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1699   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1700   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1701 
1702   if (diag) {
1703     for ( i = 0; i < slen; i++ ) {
1704       row = lrows[i] + rstart_bs;
1705       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1706     }
1707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1709   }
1710   PetscFree(lrows);
1711 
1712   /* wait on sends */
1713   if (nsends) {
1714     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1715     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1716     PetscFree(send_status);
1717   }
1718   PetscFree(send_waits); PetscFree(svalues);
1719 
1720   PetscFunctionReturn(0);
1721 }
1722 extern int MatPrintHelp_SeqBAIJ(Mat);
1723 #undef __FUNC__
1724 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1725 int MatPrintHelp_MPIBAIJ(Mat A)
1726 {
1727   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1728   MPI_Comm    comm = A->comm;
1729   static int  called = 0;
1730   int         ierr;
1731 
1732   PetscFunctionBegin;
1733   if (!a->rank) {
1734     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1735   }
1736   if (called) {PetscFunctionReturn(0);} else called = 1;
1737   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1738   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNC__
1743 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1744 int MatSetUnfactored_MPIBAIJ(Mat A)
1745 {
1746   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1747   int         ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
1755 
1756 /* -------------------------------------------------------------------*/
1757 static struct _MatOps MatOps = {
1758   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
1759   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
1760   0,0,0,0,
1761   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
1762   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
1763   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
1764   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
1765   0,0,0,MatGetSize_MPIBAIJ,
1766   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1767   0,0,MatConvertSameType_MPIBAIJ,0,0,
1768   0,0,0,MatGetSubMatrices_MPIBAIJ,
1769   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1770   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1771   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
1772 
1773 
1774 #undef __FUNC__
1775 #define __FUNC__ "MatCreateMPIBAIJ"
1776 /*@C
1777    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1778    (block compressed row).  For good matrix assembly performance
1779    the user should preallocate the matrix storage by setting the parameters
1780    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1781    performance can be increased by more than a factor of 50.
1782 
1783    Input Parameters:
1784 .  comm - MPI communicator
1785 .  bs   - size of blockk
1786 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1787            This value should be the same as the local size used in creating the
1788            y vector for the matrix-vector product y = Ax.
1789 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1790            This value should be the same as the local size used in creating the
1791            x vector for the matrix-vector product y = Ax.
1792 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1793 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1794 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1795            submatrix  (same for all local rows)
1796 .  d_nzz - array containing the number of block nonzeros in the various block rows
1797            of the in diagonal portion of the local (possibly different for each block
1798            row) or PETSC_NULL.  You must leave room for the diagonal entry even if
1799            it is zero.
1800 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1801            submatrix (same for all local rows).
1802 .  o_nzz - array containing the number of nonzeros in the various block rows of the
1803            off-diagonal portion of the local submatrix (possibly different for
1804            each block row) or PETSC_NULL.
1805 
1806    Output Parameter:
1807 .  A - the matrix
1808 
1809    Notes:
1810    The user MUST specify either the local or global matrix dimensions
1811    (possibly both).
1812 
1813    Storage Information:
1814    For a square global matrix we define each processor's diagonal portion
1815    to be its local rows and the corresponding columns (a square submatrix);
1816    each processor's off-diagonal portion encompasses the remainder of the
1817    local matrix (a rectangular submatrix).
1818 
1819    The user can specify preallocated storage for the diagonal part of
1820    the local submatrix with either d_nz or d_nnz (not both).  Set
1821    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1822    memory allocation.  Likewise, specify preallocated storage for the
1823    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1824 
1825    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1826    the figure below we depict these three local rows and all columns (0-11).
1827 
1828 $          0 1 2 3 4 5 6 7 8 9 10 11
1829 $         -------------------
1830 $  row 3  |  o o o d d d o o o o o o
1831 $  row 4  |  o o o d d d o o o o o o
1832 $  row 5  |  o o o d d d o o o o o o
1833 $         -------------------
1834 $
1835 
1836    Thus, any entries in the d locations are stored in the d (diagonal)
1837    submatrix, and any entries in the o locations are stored in the
1838    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1839    stored simply in the MATSEQBAIJ format for compressed row storage.
1840 
1841    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1842    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1843    In general, for PDE problems in which most nonzeros are near the diagonal,
1844    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1845    or you will get TERRIBLE performance; see the users' manual chapter on
1846    matrices.
1847 
1848 .keywords: matrix, block, aij, compressed row, sparse, parallel
1849 
1850 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
1851 @*/
1852 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1853                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1854 {
1855   Mat          B;
1856   Mat_MPIBAIJ  *b;
1857   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1858 
1859   PetscFunctionBegin;
1860   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1861 
1862   MPI_Comm_size(comm,&size);
1863   if (size == 1) {
1864     if (M == PETSC_DECIDE) M = m;
1865     if (N == PETSC_DECIDE) N = n;
1866     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1867     PetscFunctionReturn(0);
1868   }
1869 
1870   *A = 0;
1871   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
1872   PLogObjectCreate(B);
1873   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1874   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1875   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1876 
1877   B->ops->destroy    = MatDestroy_MPIBAIJ;
1878   B->ops->view       = MatView_MPIBAIJ;
1879   B->mapping    = 0;
1880   B->factor     = 0;
1881   B->assembled  = PETSC_FALSE;
1882 
1883   B->insertmode = NOT_SET_VALUES;
1884   MPI_Comm_rank(comm,&b->rank);
1885   MPI_Comm_size(comm,&b->size);
1886 
1887   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1888     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1889   }
1890   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1891     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1892   }
1893   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1894     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1895   }
1896   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1897   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1898 
1899   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1900     work[0] = m; work[1] = n;
1901     mbs = m/bs; nbs = n/bs;
1902     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1903     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1904     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1905   }
1906   if (m == PETSC_DECIDE) {
1907     Mbs = M/bs;
1908     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1909     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1910     m   = mbs*bs;
1911   }
1912   if (n == PETSC_DECIDE) {
1913     Nbs = N/bs;
1914     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1915     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1916     n   = nbs*bs;
1917   }
1918   if (mbs*bs != m || nbs*bs != n) {
1919     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1920   }
1921 
1922   b->m = m; B->m = m;
1923   b->n = n; B->n = n;
1924   b->N = N; B->N = N;
1925   b->M = M; B->M = M;
1926   b->bs  = bs;
1927   b->bs2 = bs*bs;
1928   b->mbs = mbs;
1929   b->nbs = nbs;
1930   b->Mbs = Mbs;
1931   b->Nbs = Nbs;
1932 
1933   /* build local table of row and column ownerships */
1934   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1935   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1936   b->cowners = b->rowners + b->size + 2;
1937   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1938   b->rowners[0] = 0;
1939   for ( i=2; i<=b->size; i++ ) {
1940     b->rowners[i] += b->rowners[i-1];
1941   }
1942   b->rstart    = b->rowners[b->rank];
1943   b->rend      = b->rowners[b->rank+1];
1944   b->rstart_bs = b->rstart * bs;
1945   b->rend_bs   = b->rend * bs;
1946 
1947   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1948   b->cowners[0] = 0;
1949   for ( i=2; i<=b->size; i++ ) {
1950     b->cowners[i] += b->cowners[i-1];
1951   }
1952   b->cstart    = b->cowners[b->rank];
1953   b->cend      = b->cowners[b->rank+1];
1954   b->cstart_bs = b->cstart * bs;
1955   b->cend_bs   = b->cend * bs;
1956 
1957 
1958   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1959   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1960   PLogObjectParent(B,b->A);
1961   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1962   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1963   PLogObjectParent(B,b->B);
1964 
1965   /* build cache for off array entries formed */
1966   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
1967   b->donotstash  = 0;
1968   b->colmap      = 0;
1969   b->garray      = 0;
1970   b->roworiented = 1;
1971 
1972   /* stuff used in block assembly */
1973   b->barray       = 0;
1974 
1975   /* stuff used for matrix vector multiply */
1976   b->lvec         = 0;
1977   b->Mvctx        = 0;
1978 
1979   /* stuff for MatGetRow() */
1980   b->rowindices   = 0;
1981   b->rowvalues    = 0;
1982   b->getrowactive = PETSC_FALSE;
1983 
1984   /* hash table stuff */
1985   b->ht           = 0;
1986   b->hd           = 0;
1987   b->ht_size      = 0;
1988   b->ht_flag      = 0;
1989   b->ht_fact      = 0;
1990   b->ht_total_ct  = 0;
1991   b->ht_insert_ct = 0;
1992 
1993   *A = B;
1994   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
1995   if (flg) {
1996     double fact = 1.39;
1997     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
1998     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
1999     if (fact <= 1.0) fact = 1.39;
2000     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2001     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2002   }
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 #undef __FUNC__
2007 #define __FUNC__ "MatConvertSameType_MPIBAIJ"
2008 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
2009 {
2010   Mat         mat;
2011   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2012   int         ierr, len=0, flg;
2013 
2014   PetscFunctionBegin;
2015   *newmat       = 0;
2016   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
2017   PLogObjectCreate(mat);
2018   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2019   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
2020   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2021   mat->ops->view       = MatView_MPIBAIJ;
2022   mat->factor     = matin->factor;
2023   mat->assembled  = PETSC_TRUE;
2024 
2025   a->m = mat->m   = oldmat->m;
2026   a->n = mat->n   = oldmat->n;
2027   a->M = mat->M   = oldmat->M;
2028   a->N = mat->N   = oldmat->N;
2029 
2030   a->bs  = oldmat->bs;
2031   a->bs2 = oldmat->bs2;
2032   a->mbs = oldmat->mbs;
2033   a->nbs = oldmat->nbs;
2034   a->Mbs = oldmat->Mbs;
2035   a->Nbs = oldmat->Nbs;
2036 
2037   a->rstart       = oldmat->rstart;
2038   a->rend         = oldmat->rend;
2039   a->cstart       = oldmat->cstart;
2040   a->cend         = oldmat->cend;
2041   a->size         = oldmat->size;
2042   a->rank         = oldmat->rank;
2043   mat->insertmode = NOT_SET_VALUES;
2044   a->rowvalues    = 0;
2045   a->getrowactive = PETSC_FALSE;
2046   a->barray       = 0;
2047 
2048   /* hash table stuff */
2049   a->ht           = 0;
2050   a->hd           = 0;
2051   a->ht_size      = 0;
2052   a->ht_flag      = oldmat->ht_flag;
2053   a->ht_fact      = oldmat->ht_fact;
2054   a->ht_total_ct  = 0;
2055   a->ht_insert_ct = 0;
2056 
2057 
2058   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2059   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2060   a->cowners = a->rowners + a->size + 2;
2061   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2062   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
2063   if (oldmat->colmap) {
2064     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2065     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2066     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2067   } else a->colmap = 0;
2068   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2069     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2070     PLogObjectMemory(mat,len*sizeof(int));
2071     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2072   } else a->garray = 0;
2073 
2074   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2075   PLogObjectParent(mat,a->lvec);
2076   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2077   PLogObjectParent(mat,a->Mvctx);
2078   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
2079   PLogObjectParent(mat,a->A);
2080   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
2081   PLogObjectParent(mat,a->B);
2082   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2083   if (flg) {
2084     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2085   }
2086   *newmat = mat;
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #include "sys.h"
2091 
2092 #undef __FUNC__
2093 #define __FUNC__ "MatLoad_MPIBAIJ"
2094 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2095 {
2096   Mat          A;
2097   int          i, nz, ierr, j,rstart, rend, fd;
2098   Scalar       *vals,*buf;
2099   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2100   MPI_Status   status;
2101   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2102   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2103   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2104   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2105   int          dcount,kmax,k,nzcount,tmp;
2106 
2107   PetscFunctionBegin;
2108   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2109 
2110   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2111   if (!rank) {
2112     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2113     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2114     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2115     if (header[3] < 0) {
2116       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2117     }
2118   }
2119 
2120   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2121   M = header[1]; N = header[2];
2122 
2123   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2124 
2125   /*
2126      This code adds extra rows to make sure the number of rows is
2127      divisible by the blocksize
2128   */
2129   Mbs        = M/bs;
2130   extra_rows = bs - M + bs*(Mbs);
2131   if (extra_rows == bs) extra_rows = 0;
2132   else                  Mbs++;
2133   if (extra_rows &&!rank) {
2134     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2135   }
2136 
2137   /* determine ownership of all rows */
2138   mbs = Mbs/size + ((Mbs % size) > rank);
2139   m   = mbs * bs;
2140   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2141   browners = rowners + size + 1;
2142   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2143   rowners[0] = 0;
2144   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2145   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2146   rstart = rowners[rank];
2147   rend   = rowners[rank+1];
2148 
2149   /* distribute row lengths to all processors */
2150   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2151   if (!rank) {
2152     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2153     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2154     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2155     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2156     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2157     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2158     PetscFree(sndcounts);
2159   } else {
2160     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2161   }
2162 
2163   if (!rank) {
2164     /* calculate the number of nonzeros on each processor */
2165     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2166     PetscMemzero(procsnz,size*sizeof(int));
2167     for ( i=0; i<size; i++ ) {
2168       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2169         procsnz[i] += rowlengths[j];
2170       }
2171     }
2172     PetscFree(rowlengths);
2173 
2174     /* determine max buffer needed and allocate it */
2175     maxnz = 0;
2176     for ( i=0; i<size; i++ ) {
2177       maxnz = PetscMax(maxnz,procsnz[i]);
2178     }
2179     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2180 
2181     /* read in my part of the matrix column indices  */
2182     nz = procsnz[0];
2183     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2184     mycols = ibuf;
2185     if (size == 1)  nz -= extra_rows;
2186     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2187     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2188 
2189     /* read in every ones (except the last) and ship off */
2190     for ( i=1; i<size-1; i++ ) {
2191       nz   = procsnz[i];
2192       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2193       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2194     }
2195     /* read in the stuff for the last proc */
2196     if ( size != 1 ) {
2197       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2198       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2199       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2200       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2201     }
2202     PetscFree(cols);
2203   } else {
2204     /* determine buffer space needed for message */
2205     nz = 0;
2206     for ( i=0; i<m; i++ ) {
2207       nz += locrowlens[i];
2208     }
2209     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2210     mycols = ibuf;
2211     /* receive message of column indices*/
2212     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2213     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2214     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2215   }
2216 
2217   /* loop over local rows, determining number of off diagonal entries */
2218   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2219   odlens = dlens + (rend-rstart);
2220   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2221   PetscMemzero(mask,3*Mbs*sizeof(int));
2222   masked1 = mask    + Mbs;
2223   masked2 = masked1 + Mbs;
2224   rowcount = 0; nzcount = 0;
2225   for ( i=0; i<mbs; i++ ) {
2226     dcount  = 0;
2227     odcount = 0;
2228     for ( j=0; j<bs; j++ ) {
2229       kmax = locrowlens[rowcount];
2230       for ( k=0; k<kmax; k++ ) {
2231         tmp = mycols[nzcount++]/bs;
2232         if (!mask[tmp]) {
2233           mask[tmp] = 1;
2234           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2235           else masked1[dcount++] = tmp;
2236         }
2237       }
2238       rowcount++;
2239     }
2240 
2241     dlens[i]  = dcount;
2242     odlens[i] = odcount;
2243 
2244     /* zero out the mask elements we set */
2245     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2246     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2247   }
2248 
2249   /* create our matrix */
2250   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2251          CHKERRQ(ierr);
2252   A = *newmat;
2253   MatSetOption(A,MAT_COLUMNS_SORTED);
2254 
2255   if (!rank) {
2256     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2257     /* read in my part of the matrix numerical values  */
2258     nz = procsnz[0];
2259     vals = buf;
2260     mycols = ibuf;
2261     if (size == 1)  nz -= extra_rows;
2262     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2263     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2264 
2265     /* insert into matrix */
2266     jj      = rstart*bs;
2267     for ( i=0; i<m; i++ ) {
2268       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2269       mycols += locrowlens[i];
2270       vals   += locrowlens[i];
2271       jj++;
2272     }
2273     /* read in other processors (except the last one) and ship out */
2274     for ( i=1; i<size-1; i++ ) {
2275       nz   = procsnz[i];
2276       vals = buf;
2277       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2278       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2279     }
2280     /* the last proc */
2281     if ( size != 1 ){
2282       nz   = procsnz[i] - extra_rows;
2283       vals = buf;
2284       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2285       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2286       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2287     }
2288     PetscFree(procsnz);
2289   } else {
2290     /* receive numeric values */
2291     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2292 
2293     /* receive message of values*/
2294     vals   = buf;
2295     mycols = ibuf;
2296     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2297     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2298     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2299 
2300     /* insert into matrix */
2301     jj      = rstart*bs;
2302     for ( i=0; i<m; i++ ) {
2303       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2304       mycols += locrowlens[i];
2305       vals   += locrowlens[i];
2306       jj++;
2307     }
2308   }
2309   PetscFree(locrowlens);
2310   PetscFree(buf);
2311   PetscFree(ibuf);
2312   PetscFree(rowners);
2313   PetscFree(dlens);
2314   PetscFree(mask);
2315   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2316   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 
2321 
2322 #undef __FUNC__
2323 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2324 /*@
2325    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2326 
2327    Input Parameters:
2328 .  mat  - the matrix
2329 .  fact - factor
2330 
2331    Notes:
2332    This can also be set by the command line option: -mat_use_hash_table fact
2333 
2334 .keywords: matrix, hashtable, factor, HT
2335 
2336 .seealso: MatSetOption()
2337 @*/
2338 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2339 {
2340   Mat_MPIBAIJ *baij;
2341 
2342   PetscFunctionBegin;
2343   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2344   if (mat->type != MATMPIBAIJ) {
2345       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2346   }
2347   baij = (Mat_MPIBAIJ*) mat->data;
2348   baij->ht_fact = fact;
2349   PetscFunctionReturn(0);
2350 }
2351