xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 570da9069dc1712c19dfdfeb8bba039bcc9668b9)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.121 1998/05/11 21:55:16 bsmith Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"         /*I "mat.h" I*/
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 
10 extern int MatSetUpMultiply_MPIBAIJ(Mat);
11 extern int DisAssemble_MPIBAIJ(Mat);
12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
14 
15 /*
16      Local utility routine that creates a mapping from the global column
17    number to the local number in the off-diagonal part of the local
18    storage of the matrix.  This is done in a non scable way since the
19    length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
23 static int CreateColmap_MPIBAIJ_Private(Mat mat)
24 {
25   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
26   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
27   int         nbs = B->nbs,i,bs=B->bs;;
28 
29   PetscFunctionBegin;
30   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
31   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
32   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
33   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
34   PetscFunctionReturn(0);
35 }
36 
37 #define CHUNKSIZE  10
38 
39 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
40 { \
41  \
42     brow = row/bs;  \
43     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
44     rmax = aimax[brow]; nrow = ailen[brow]; \
45       bcol = col/bs; \
46       ridx = row % bs; cidx = col % bs; \
47       low = 0; high = nrow; \
48       while (high-low > 3) { \
49         t = (low+high)/2; \
50         if (rp[t] > bcol) high = t; \
51         else              low  = t; \
52       } \
53       for ( _i=low; _i<high; _i++ ) { \
54         if (rp[_i] > bcol) break; \
55         if (rp[_i] == bcol) { \
56           bap  = ap +  bs2*_i + bs*cidx + ridx; \
57           if (addv == ADD_VALUES) *bap += value;  \
58           else                    *bap  = value;  \
59           goto a_noinsert; \
60         } \
61       } \
62       if (a->nonew == 1) goto a_noinsert; \
63       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
64       if (nrow >= rmax) { \
65         /* there is no extra room in row, therefore enlarge */ \
66         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
67         Scalar *new_a; \
68  \
69         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
70  \
71         /* malloc new storage space */ \
72         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
73         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
74         new_j   = (int *) (new_a + bs2*new_nz); \
75         new_i   = new_j + new_nz; \
76  \
77         /* copy over old data into new slots */ \
78         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
79         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
80         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
81         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
82         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
83                                                            len*sizeof(int)); \
84         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
85         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
86         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
87                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
88         /* free up old matrix storage */ \
89         PetscFree(a->a);  \
90         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
91         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
92         a->singlemalloc = 1; \
93  \
94         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
96         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
97         a->maxnz += bs2*CHUNKSIZE; \
98         a->reallocs++; \
99         a->nz++; \
100       } \
101       N = nrow++ - 1;  \
102       /* shift up all the later entries in this row */ \
103       for ( ii=N; ii>=_i; ii-- ) { \
104         rp[ii+1] = rp[ii]; \
105         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
106       } \
107       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
108       rp[_i]                      = bcol;  \
109       ap[bs2*_i + bs*cidx + ridx] = value;  \
110       a_noinsert:; \
111     ailen[brow] = nrow; \
112 }
113 
114 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
115 { \
116  \
117     brow = row/bs;  \
118     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
119     rmax = bimax[brow]; nrow = bilen[brow]; \
120       bcol = col/bs; \
121       ridx = row % bs; cidx = col % bs; \
122       low = 0; high = nrow; \
123       while (high-low > 3) { \
124         t = (low+high)/2; \
125         if (rp[t] > bcol) high = t; \
126         else              low  = t; \
127       } \
128       for ( _i=low; _i<high; _i++ ) { \
129         if (rp[_i] > bcol) break; \
130         if (rp[_i] == bcol) { \
131           bap  = ap +  bs2*_i + bs*cidx + ridx; \
132           if (addv == ADD_VALUES) *bap += value;  \
133           else                    *bap  = value;  \
134           goto b_noinsert; \
135         } \
136       } \
137       if (b->nonew == 1) goto b_noinsert; \
138       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139       if (nrow >= rmax) { \
140         /* there is no extra room in row, therefore enlarge */ \
141         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
142         Scalar *new_a; \
143  \
144         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
145  \
146         /* malloc new storage space */ \
147         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
148         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
149         new_j   = (int *) (new_a + bs2*new_nz); \
150         new_i   = new_j + new_nz; \
151  \
152         /* copy over old data into new slots */ \
153         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
154         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
156         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
157         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
158                                                            len*sizeof(int)); \
159         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
160         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
161         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
162                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
163         /* free up old matrix storage */ \
164         PetscFree(b->a);  \
165         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
166         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
167         b->singlemalloc = 1; \
168  \
169         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
171         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
172         b->maxnz += bs2*CHUNKSIZE; \
173         b->reallocs++; \
174         b->nz++; \
175       } \
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for ( ii=N; ii>=_i; ii-- ) { \
179         rp[ii+1] = rp[ii]; \
180         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
181       } \
182       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNC__
190 #define __FUNC__ "MatSetValues_MPIBAIJ"
191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
192 {
193   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
194   Scalar      value;
195   int         ierr,i,j,row,col;
196   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
197   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
198   int         cend_orig=baij->cend_bs,bs=baij->bs;
199 
200   /* Some Variables required in the macro */
201   Mat         A = baij->A;
202   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
203   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204   Scalar      *aa=a->a;
205 
206   Mat         B = baij->B;
207   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
208   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209   Scalar      *ba=b->a;
210 
211   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
212   int         low,high,t,ridx,cidx,bs2=a->bs2;
213   Scalar      *ap,*bap;
214 
215   PetscFunctionBegin;
216   for ( i=0; i<m; i++ ) {
217 #if defined(USE_PETSC_BOPT_g)
218     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
219     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for ( j=0; j<n; j++ ) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig){
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         }
230 #if defined(USE_PETSC_BOPT_g)
231         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
232         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
240             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
241               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
244               B = baij->B;
245               b = (Mat_SeqBAIJ *) (B)->data;
246               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
247               ba=b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     } else {
256       if (roworiented && !baij->donotstash) {
257         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258       } else {
259         if (!baij->donotstash) {
260           row = im[i];
261 	  for ( j=0; j<n; j++ ) {
262 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
263           }
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
272 #undef __FUNC__
273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
275 {
276   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
277   Scalar      *value,*barray=baij->barray;
278   int         ierr,i,j,ii,jj,row,col,k,l;
279   int         roworiented = baij->roworiented,rstart=baij->rstart ;
280   int         rend=baij->rend,cstart=baij->cstart,stepval;
281   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
282 
283   if(!barray) {
284     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
285   }
286 
287   if (roworiented) {
288     stepval = (n-1)*bs;
289   } else {
290     stepval = (m-1)*bs;
291   }
292   for ( i=0; i<m; i++ ) {
293 #if defined(USE_PETSC_BOPT_g)
294     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
295     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
296 #endif
297     if (im[i] >= rstart && im[i] < rend) {
298       row = im[i] - rstart;
299       for ( j=0; j<n; j++ ) {
300         /* If NumCol = 1 then a copy is not required */
301         if ((roworiented) && (n == 1)) {
302           barray = v + i*bs2;
303         } else if((!roworiented) && (m == 1)) {
304           barray = v + j*bs2;
305         } else { /* Here a copy is required */
306           if (roworiented) {
307             value = v + i*(stepval+bs)*bs + j*bs;
308           } else {
309             value = v + j*(stepval+bs)*bs + i*bs;
310           }
311           for ( ii=0; ii<bs; ii++,value+=stepval ) {
312             for (jj=0; jj<bs; jj++ ) {
313               *barray++  = *value++;
314             }
315           }
316           barray -=bs2;
317         }
318 
319         if (in[j] >= cstart && in[j] < cend){
320           col  = in[j] - cstart;
321           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
322         }
323 #if defined(USE_PETSC_BOPT_g)
324         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
325         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
326 #endif
327         else {
328           if (mat->was_assembled) {
329             if (!baij->colmap) {
330               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
331             }
332 
333 #if defined(USE_PETSC_BOPT_g)
334             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
335 #endif
336             col = (baij->colmap[in[j]] - 1)/bs;
337             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
338               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
339               col =  in[j];
340             }
341           }
342           else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (!baij->donotstash) {
348         if (roworiented ) {
349           row   = im[i]*bs;
350           value = v + i*(stepval+bs)*bs;
351           for ( j=0; j<bs; j++,row++ ) {
352             for ( k=0; k<n; k++ ) {
353               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
354                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
355               }
356             }
357           }
358         } else {
359           for ( j=0; j<n; j++ ) {
360             value = v + j*(stepval+bs)*bs + i*bs;
361             col   = in[j]*bs;
362             for ( k=0; k<bs; k++,col++,value+=stepval) {
363               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
364                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
365               }
366             }
367           }
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 #include <math.h>
375 #define HASH_KEY 0.6180339887
376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNC__
380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
382 {
383   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
384   int         ierr,i,j,row,col;
385   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
386   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
387   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
388   double      tmp;
389   Scalar      ** HD = baij->hd,value;
390 #if defined(USE_PETSC_BOPT_g)
391   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
392 #endif
393 
394   PetscFunctionBegin;
395 
396   for ( i=0; i<m; i++ ) {
397 #if defined(USE_PETSC_BOPT_g)
398     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
399     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
400 #endif
401       row = im[i];
402     if (row >= rstart_orig && row < rend_orig) {
403       for ( j=0; j<n; j++ ) {
404         col = in[j];
405         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
406         /* Look up into the Hash Table */
407         key = (row/bs)*Nbs+(col/bs)+1;
408         h1  = HASH(size,key,tmp);
409 
410 
411         idx = h1;
412 #if defined(USE_PETSC_BOPT_g)
413         insert_ct++;
414         total_ct++;
415         if (HT[idx] != key) {
416           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
417           if (idx == size) {
418             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
419             if (idx == h1) {
420               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
421             }
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
427           if (idx == size) {
428             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
429             if (idx == h1) {
430               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
431             }
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else {
440       if (roworiented && !baij->donotstash) {
441         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
442       } else {
443         if (!baij->donotstash) {
444           row = im[i];
445 	  for ( j=0; j<n; j++ ) {
446 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
447           }
448         }
449       }
450     }
451   }
452 #if defined(USE_PETSC_BOPT_g)
453   baij->ht_total_ct = total_ct;
454   baij->ht_insert_ct = insert_ct;
455 #endif
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNC__
460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
462 {
463   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
464   int         ierr,i,j,ii,jj,row,col,k,l;
465   int         roworiented = baij->roworiented,rstart=baij->rstart ;
466   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
467   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
468   double      tmp;
469   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
470 #if defined(USE_PETSC_BOPT_g)
471   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
472 #endif
473 
474   PetscFunctionBegin;
475 
476   if (roworiented) {
477     stepval = (n-1)*bs;
478   } else {
479     stepval = (m-1)*bs;
480   }
481   for ( i=0; i<m; i++ ) {
482 #if defined(USE_PETSC_BOPT_g)
483     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
484     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
485 #endif
486     row   = im[i];
487     v_t   = v + i*bs2;
488     if (row >= rstart && row < rend) {
489       for ( j=0; j<n; j++ ) {
490         col = in[j];
491 
492         /* Look up into the Hash Table */
493         key = row*Nbs+col+1;
494         h1  = HASH(size,key,tmp);
495 
496         idx = h1;
497 #if defined(USE_PETSC_BOPT_g)
498         total_ct++;
499         insert_ct++;
500        if (HT[idx] != key) {
501           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
502           if (idx == size) {
503             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
504             if (idx == h1) {
505               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
506             }
507           }
508         }
509 #else
510         if (HT[idx] != key) {
511           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
512           if (idx == size) {
513             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
514             if (idx == h1) {
515               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
516             }
517           }
518         }
519 #endif
520         baij_a = HD[idx];
521         if (roworiented) {
522           /*value = v + i*(stepval+bs)*bs + j*bs;*/
523           /* value = v + (i*(stepval+bs)+j)*bs; */
524           value = v_t;
525           v_t  += bs;
526           if (addv == ADD_VALUES) {
527             for ( ii=0; ii<bs; ii++,value+=stepval) {
528               for ( jj=ii; jj<bs2; jj+=bs ) {
529                 baij_a[jj]  += *value++;
530               }
531             }
532           } else {
533             for ( ii=0; ii<bs; ii++,value+=stepval) {
534               for ( jj=ii; jj<bs2; jj+=bs ) {
535                 baij_a[jj]  = *value++;
536               }
537             }
538           }
539         } else {
540           value = v + j*(stepval+bs)*bs + i*bs;
541           if (addv == ADD_VALUES) {
542             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
543               for ( jj=0; jj<bs; jj++ ) {
544                 baij_a[jj]  += *value++;
545               }
546             }
547           } else {
548             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
549               for ( jj=0; jj<bs; jj++ ) {
550                 baij_a[jj]  = *value++;
551               }
552             }
553           }
554         }
555       }
556     } else {
557       if (!baij->donotstash) {
558         if (roworiented ) {
559           row   = im[i]*bs;
560           value = v + i*(stepval+bs)*bs;
561           for ( j=0; j<bs; j++,row++ ) {
562             for ( k=0; k<n; k++ ) {
563               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
564                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
565               }
566             }
567           }
568         } else {
569           for ( j=0; j<n; j++ ) {
570             value = v + j*(stepval+bs)*bs + i*bs;
571             col   = in[j]*bs;
572             for ( k=0; k<bs; k++,col++,value+=stepval) {
573               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
574                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
575               }
576             }
577           }
578         }
579       }
580     }
581   }
582 #if defined(USE_PETSC_BOPT_g)
583   baij->ht_total_ct = total_ct;
584   baij->ht_insert_ct = insert_ct;
585 #endif
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNC__
590 #define __FUNC__ "MatGetValues_MPIBAIJ"
591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
592 {
593   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
594   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
595   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
596 
597   PetscFunctionBegin;
598   for ( i=0; i<m; i++ ) {
599     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
600     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
601     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
602       row = idxm[i] - bsrstart;
603       for ( j=0; j<n; j++ ) {
604         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
605         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
606         if (idxn[j] >= bscstart && idxn[j] < bscend){
607           col = idxn[j] - bscstart;
608           ierr = MatGetValues(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
609         } else {
610           if (!baij->colmap) {
611             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
612           }
613           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
614              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
615           else {
616             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
617             ierr = MatGetValues(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
618           }
619         }
620       }
621     } else {
622       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
623     }
624   }
625  PetscFunctionReturn(0);
626 }
627 
628 #undef __FUNC__
629 #define __FUNC__ "MatNorm_MPIBAIJ"
630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
631 {
632   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
633   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
634   int        ierr, i,bs2=baij->bs2;
635   double     sum = 0.0;
636   Scalar     *v;
637 
638   PetscFunctionBegin;
639   if (baij->size == 1) {
640     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
641   } else {
642     if (type == NORM_FROBENIUS) {
643       v = amat->a;
644       for (i=0; i<amat->nz*bs2; i++ ) {
645 #if defined(USE_PETSC_COMPLEX)
646         sum += real(conj(*v)*(*v)); v++;
647 #else
648         sum += (*v)*(*v); v++;
649 #endif
650       }
651       v = bmat->a;
652       for (i=0; i<bmat->nz*bs2; i++ ) {
653 #if defined(USE_PETSC_COMPLEX)
654         sum += real(conj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
660       *norm = sqrt(*norm);
661     } else {
662       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
663     }
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
673   MPI_Comm    comm = mat->comm;
674   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
675   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
676   MPI_Request *send_waits,*recv_waits;
677   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
678   InsertMode  addv;
679   Scalar      *rvalues,*svalues;
680 
681   PetscFunctionBegin;
682   if (baij->donotstash) {
683     baij->svalues    = 0; baij->rvalues    = 0;
684     baij->nsends     = 0; baij->nrecvs     = 0;
685     baij->send_waits = 0; baij->recv_waits = 0;
686     baij->rmax       = 0;
687     PetscFunctionReturn(0);
688   }
689 
690   /* make sure all processors are either in INSERTMODE or ADDMODE */
691   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
692   if (addv == (ADD_VALUES|INSERT_VALUES)) {
693     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
694   }
695   mat->insertmode = addv; /* in case this processor had no cache */
696 
697   /*  first count number of contributors to each processor */
698   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
699   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
700   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
701   for ( i=0; i<baij->stash.n; i++ ) {
702     idx = baij->stash.idx[i];
703     for ( j=0; j<size; j++ ) {
704       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
705         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
706       }
707     }
708   }
709   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
710 
711   /* inform other processors of number of messages and max length*/
712   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
713   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
714   nreceives = work[rank];
715   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
716   nmax      = work[rank];
717   PetscFree(work);
718 
719   /* post receives:
720        1) each message will consist of ordered pairs
721      (global index,value) we store the global index as a double
722      to simplify the message passing.
723        2) since we don't know how long each individual message is we
724      allocate the largest needed buffer for each receive. Potentially
725      this is a lot of wasted space.
726 
727 
728        This could be done better.
729   */
730   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
731   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
732   for ( i=0; i<nreceives; i++ ) {
733     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
734   }
735 
736   /* do sends:
737       1) starts[i] gives the starting index in svalues for stuff going to
738          the ith processor
739   */
740   svalues    = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
741   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
742   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
743   starts[0] = 0;
744   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
745   for ( i=0; i<baij->stash.n; i++ ) {
746     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
747     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
748     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
749   }
750   PetscFree(owner);
751   starts[0] = 0;
752   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
753   count = 0;
754   for ( i=0; i<size; i++ ) {
755     if (procs[i]) {
756       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
757     }
758   }
759   PetscFree(starts); PetscFree(nprocs);
760 
761   /* Free cache space */
762   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
763   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
764 
765   baij->svalues    = svalues;    baij->rvalues    = rvalues;
766   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
767   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
768   baij->rmax       = nmax;
769 
770   PetscFunctionReturn(0);
771 }
772 
773 /*
774   Creates the hash table, and sets the table
775   This table is created only once.
776   If new entried need to be added to the matrix
777   then the hash table has to be destroyed and
778   recreated.
779 */
780 #undef __FUNC__
781 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
782 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
783 {
784   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
785   Mat         A = baij->A, B=baij->B;
786   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
787   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
788   int         size,bs2=baij->bs2,rstart=baij->rstart;
789   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
790   int         *HT,key;
791   Scalar      **HD;
792   double      tmp;
793 #if defined(USE_PETSC_BOPT_g)
794   int         ct=0,max=0;
795 #endif
796 
797   PetscFunctionBegin;
798   baij->ht_size=(int)(factor*nz);
799   size = baij->ht_size;
800 
801   if (baij->ht) {
802     PetscFunctionReturn(0);
803   }
804 
805   /* Allocate Memory for Hash Table */
806   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
807   baij->ht = (int*)(baij->hd + size);
808   HD = baij->hd;
809   HT = baij->ht;
810 
811 
812   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
813 
814 
815   /* Loop Over A */
816   for ( i=0; i<a->mbs; i++ ) {
817     for ( j=ai[i]; j<ai[i+1]; j++ ) {
818       row = i+rstart;
819       col = aj[j]+cstart;
820 
821       key = row*Nbs + col + 1;
822       h1  = HASH(size,key,tmp);
823       for ( k=0; k<size; k++ ){
824         if (HT[(h1+k)%size] == 0.0) {
825           HT[(h1+k)%size] = key;
826           HD[(h1+k)%size] = a->a + j*bs2;
827           break;
828 #if defined(USE_PETSC_BOPT_g)
829         } else {
830           ct++;
831 #endif
832         }
833       }
834 #if defined(USE_PETSC_BOPT_g)
835       if (k> max) max = k;
836 #endif
837     }
838   }
839   /* Loop Over B */
840   for ( i=0; i<b->mbs; i++ ) {
841     for ( j=bi[i]; j<bi[i+1]; j++ ) {
842       row = i+rstart;
843       col = garray[bj[j]];
844       key = row*Nbs + col + 1;
845       h1  = HASH(size,key,tmp);
846       for ( k=0; k<size; k++ ){
847         if (HT[(h1+k)%size] == 0.0) {
848           HT[(h1+k)%size] = key;
849           HD[(h1+k)%size] = b->a + j*bs2;
850           break;
851 #if defined(USE_PETSC_BOPT_g)
852         } else {
853           ct++;
854 #endif
855         }
856       }
857 #if defined(USE_PETSC_BOPT_g)
858       if (k> max) max = k;
859 #endif
860     }
861   }
862 
863   /* Print Summary */
864 #if defined(USE_PETSC_BOPT_g)
865   for ( i=0,j=0; i<size; i++)
866     if (HT[i]) {j++;}
867   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
868            (j== 0)? 0.0:((double)(ct+j))/j,max);
869 #endif
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNC__
874 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
875 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
876 {
877   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
878   MPI_Status  *send_status,recv_status;
879   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
880   int         bs=baij->bs,row,col,other_disassembled;
881   Scalar      *values,val;
882   InsertMode  addv = mat->insertmode;
883 
884   PetscFunctionBegin;
885   /*  wait on receives */
886   while (count) {
887     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
888     /* unpack receives into our local space */
889     values = baij->rvalues + 3*imdex*baij->rmax;
890     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
891     n = n/3;
892     for ( i=0; i<n; i++ ) {
893       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
894       col = (int) PetscReal(values[3*i+1]);
895       val = values[3*i+2];
896       if (col >= baij->cstart*bs && col < baij->cend*bs) {
897         col -= baij->cstart*bs;
898         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
899       } else {
900         if (mat->was_assembled) {
901           if (!baij->colmap) {
902             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
903           }
904           col = (baij->colmap[col/bs]) - 1 + col%bs;
905           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
906             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
907             col = (int) PetscReal(values[3*i+1]);
908           }
909         }
910         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
911       }
912     }
913     count--;
914   }
915   if (baij->recv_waits) PetscFree(baij->recv_waits);
916   if (baij->rvalues)    PetscFree(baij->rvalues);
917 
918   /* wait on sends */
919   if (baij->nsends) {
920     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
921     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
922     PetscFree(send_status);
923   }
924   if (baij->send_waits) PetscFree(baij->send_waits);
925   if (baij->svalues)    PetscFree(baij->svalues);
926 
927   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
928   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
929 
930   /* determine if any processor has disassembled, if so we must
931      also disassemble ourselfs, in order that we may reassemble. */
932   ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
933   if (mat->was_assembled && !other_disassembled) {
934     ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
935   }
936 
937   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
938     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
939   }
940   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
941   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
942 
943 #if defined(USE_PETSC_BOPT_g)
944   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
945     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
946              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
947     baij->ht_total_ct  = 0;
948     baij->ht_insert_ct = 0;
949   }
950 #endif
951   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
952     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
953     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
954     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
955   }
956 
957   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
958   PetscFunctionReturn(0);
959 }
960 
961 #undef __FUNC__
962 #define __FUNC__ "MatView_MPIBAIJ_Binary"
963 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
964 {
965   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
966   int          ierr;
967 
968   PetscFunctionBegin;
969   if (baij->size == 1) {
970     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
971   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNC__
976 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
977 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
978 {
979   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
980   int          ierr, format,rank,bs = baij->bs;
981   FILE         *fd;
982   ViewerType   vtype;
983 
984   PetscFunctionBegin;
985   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
986   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
987     ierr = ViewerGetFormat(viewer,&format);
988     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
989       MatInfo info;
990       MPI_Comm_rank(mat->comm,&rank);
991       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
992       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
993       PetscSequentialPhaseBegin(mat->comm,1);
994       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
995               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
996               baij->bs,(int)info.memory);
997       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
998       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
999       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
1000       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
1001       fflush(fd);
1002       PetscSequentialPhaseEnd(mat->comm,1);
1003       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
1004       PetscFunctionReturn(0);
1005     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1006       PetscPrintf(mat->comm,"  block size is %d\n",bs);
1007       PetscFunctionReturn(0);
1008     }
1009   }
1010 
1011   if (vtype == DRAW_VIEWER) {
1012     Draw       draw;
1013     PetscTruth isnull;
1014     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
1015     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1016   }
1017 
1018   if (vtype == ASCII_FILE_VIEWER) {
1019     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1020     PetscSequentialPhaseBegin(mat->comm,1);
1021     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
1022            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
1023             baij->cstart*bs,baij->cend*bs);
1024     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1025     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
1026     fflush(fd);
1027     PetscSequentialPhaseEnd(mat->comm,1);
1028   } else {
1029     int size = baij->size;
1030     rank = baij->rank;
1031     if (size == 1) {
1032       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1033     } else {
1034       /* assemble the entire matrix onto first processor. */
1035       Mat         A;
1036       Mat_SeqBAIJ *Aloc;
1037       int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
1038       int         mbs=baij->mbs;
1039       Scalar      *a;
1040 
1041       if (!rank) {
1042         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1043       } else {
1044         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1045       }
1046       PLogObjectParent(mat,A);
1047 
1048       /* copy over the A part */
1049       Aloc = (Mat_SeqBAIJ*) baij->A->data;
1050       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1051       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1052 
1053       for ( i=0; i<mbs; i++ ) {
1054         rvals[0] = bs*(baij->rstart + i);
1055         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1056         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1057           col = (baij->cstart+aj[j])*bs;
1058           for (k=0; k<bs; k++ ) {
1059             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1060             col++; a += bs;
1061           }
1062         }
1063       }
1064       /* copy over the B part */
1065       Aloc = (Mat_SeqBAIJ*) baij->B->data;
1066       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1067       for ( i=0; i<mbs; i++ ) {
1068         rvals[0] = bs*(baij->rstart + i);
1069         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1070         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1071           col = baij->garray[aj[j]]*bs;
1072           for (k=0; k<bs; k++ ) {
1073             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1074             col++; a += bs;
1075           }
1076         }
1077       }
1078       PetscFree(rvals);
1079       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1080       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1081       /*
1082          Everyone has to call to draw the matrix since the graphics waits are
1083          synchronized across all processors that share the Draw object
1084       */
1085       if (!rank || vtype == DRAW_VIEWER) {
1086         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1087       }
1088       ierr = MatDestroy(A); CHKERRQ(ierr);
1089     }
1090   }
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 
1095 
1096 #undef __FUNC__
1097 #define __FUNC__ "MatView_MPIBAIJ"
1098 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1099 {
1100   int         ierr;
1101   ViewerType  vtype;
1102 
1103   PetscFunctionBegin;
1104   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1105   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
1106       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
1107     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
1108   } else if (vtype == BINARY_FILE_VIEWER) {
1109     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1110   } else {
1111     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1112   }
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNC__
1117 #define __FUNC__ "MatDestroy_MPIBAIJ"
1118 int MatDestroy_MPIBAIJ(Mat mat)
1119 {
1120   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1121   int         ierr;
1122 
1123   PetscFunctionBegin;
1124 #if defined(USE_PETSC_LOG)
1125   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1126 #endif
1127 
1128   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1129   PetscFree(baij->rowners);
1130   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1131   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1132   if (baij->colmap) PetscFree(baij->colmap);
1133   if (baij->garray) PetscFree(baij->garray);
1134   if (baij->lvec)   VecDestroy(baij->lvec);
1135   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1136   if (baij->rowvalues) PetscFree(baij->rowvalues);
1137   if (baij->barray) PetscFree(baij->barray);
1138   if (baij->hd) PetscFree(baij->hd);
1139   PetscFree(baij);
1140   PLogObjectDestroy(mat);
1141   PetscHeaderDestroy(mat);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNC__
1146 #define __FUNC__ "MatMult_MPIBAIJ"
1147 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1150   int         ierr, nt;
1151 
1152   PetscFunctionBegin;
1153   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1154   if (nt != a->n) {
1155     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1156   }
1157   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1158   if (nt != a->m) {
1159     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1160   }
1161   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1162   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1163   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1164   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1165   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNC__
1170 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1171 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1172 {
1173   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1174   int        ierr;
1175 
1176   PetscFunctionBegin;
1177   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1178   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1179   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1180   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNC__
1185 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1186 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1187 {
1188   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1189   int         ierr;
1190 
1191   PetscFunctionBegin;
1192   /* do nondiagonal part */
1193   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1194   /* send it on its way */
1195   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1196   /* do local part */
1197   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1198   /* receive remote parts: note this assumes the values are not actually */
1199   /* inserted in yy until the next line, which is true for my implementation*/
1200   /* but is not perhaps always true. */
1201   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNC__
1206 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1207 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1208 {
1209   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1210   int         ierr;
1211 
1212   PetscFunctionBegin;
1213   /* do nondiagonal part */
1214   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1215   /* send it on its way */
1216   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1217   /* do local part */
1218   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1219   /* receive remote parts: note this assumes the values are not actually */
1220   /* inserted in yy until the next line, which is true for my implementation*/
1221   /* but is not perhaps always true. */
1222   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 /*
1227   This only works correctly for square matrices where the subblock A->A is the
1228    diagonal block
1229 */
1230 #undef __FUNC__
1231 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1232 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1233 {
1234   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1235   int         ierr;
1236 
1237   PetscFunctionBegin;
1238   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1239   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNC__
1244 #define __FUNC__ "MatScale_MPIBAIJ"
1245 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1246 {
1247   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1248   int         ierr;
1249 
1250   PetscFunctionBegin;
1251   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1252   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNC__
1257 #define __FUNC__ "MatGetSize_MPIBAIJ"
1258 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1259 {
1260   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1261 
1262   PetscFunctionBegin;
1263   if (m) *m = mat->M;
1264   if (n) *n = mat->N;
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNC__
1269 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1270 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1271 {
1272   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1273 
1274   PetscFunctionBegin;
1275   *m = mat->m; *n = mat->n;
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNC__
1280 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1281 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1282 {
1283   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1284 
1285   PetscFunctionBegin;
1286   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1291 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1292 
1293 #undef __FUNC__
1294 #define __FUNC__ "MatGetRow_MPIBAIJ"
1295 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1296 {
1297   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1298   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1299   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1300   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1301   int        *cmap, *idx_p,cstart = mat->cstart;
1302 
1303   PetscFunctionBegin;
1304   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1305   mat->getrowactive = PETSC_TRUE;
1306 
1307   if (!mat->rowvalues && (idx || v)) {
1308     /*
1309         allocate enough space to hold information from the longest row.
1310     */
1311     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1312     int     max = 1,mbs = mat->mbs,tmp;
1313     for ( i=0; i<mbs; i++ ) {
1314       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1315       if (max < tmp) { max = tmp; }
1316     }
1317     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1318     CHKPTRQ(mat->rowvalues);
1319     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1320   }
1321 
1322   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1323   lrow = row - brstart;
1324 
1325   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1326   if (!v)   {pvA = 0; pvB = 0;}
1327   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1328   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1329   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1330   nztot = nzA + nzB;
1331 
1332   cmap  = mat->garray;
1333   if (v  || idx) {
1334     if (nztot) {
1335       /* Sort by increasing column numbers, assuming A and B already sorted */
1336       int imark = -1;
1337       if (v) {
1338         *v = v_p = mat->rowvalues;
1339         for ( i=0; i<nzB; i++ ) {
1340           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1341           else break;
1342         }
1343         imark = i;
1344         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1345         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1346       }
1347       if (idx) {
1348         *idx = idx_p = mat->rowindices;
1349         if (imark > -1) {
1350           for ( i=0; i<imark; i++ ) {
1351             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1352           }
1353         } else {
1354           for ( i=0; i<nzB; i++ ) {
1355             if (cmap[cworkB[i]/bs] < cstart)
1356               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1357             else break;
1358           }
1359           imark = i;
1360         }
1361         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1362         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1363       }
1364     } else {
1365       if (idx) *idx = 0;
1366       if (v)   *v   = 0;
1367     }
1368   }
1369   *nz = nztot;
1370   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1371   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNC__
1376 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1377 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1378 {
1379   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1380 
1381   PetscFunctionBegin;
1382   if (baij->getrowactive == PETSC_FALSE) {
1383     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1384   }
1385   baij->getrowactive = PETSC_FALSE;
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNC__
1390 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1391 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1392 {
1393   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1394 
1395   PetscFunctionBegin;
1396   *bs = baij->bs;
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNC__
1401 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1402 int MatZeroEntries_MPIBAIJ(Mat A)
1403 {
1404   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1405   int         ierr;
1406 
1407   PetscFunctionBegin;
1408   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1409   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNC__
1414 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1415 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1416 {
1417   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1418   Mat         A = a->A, B = a->B;
1419   int         ierr;
1420   double      isend[5], irecv[5];
1421 
1422   PetscFunctionBegin;
1423   info->block_size     = (double)a->bs;
1424   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1425   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
1426   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1427   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
1428   if (flag == MAT_LOCAL) {
1429     info->nz_used      = isend[0];
1430     info->nz_allocated = isend[1];
1431     info->nz_unneeded  = isend[2];
1432     info->memory       = isend[3];
1433     info->mallocs      = isend[4];
1434   } else if (flag == MAT_GLOBAL_MAX) {
1435     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_MAX,matin->comm);CHKERRQ(ierr);
1436     info->nz_used      = irecv[0];
1437     info->nz_allocated = irecv[1];
1438     info->nz_unneeded  = irecv[2];
1439     info->memory       = irecv[3];
1440     info->mallocs      = irecv[4];
1441   } else if (flag == MAT_GLOBAL_SUM) {
1442     ierr = MPI_Allreduce(isend,irecv,5,MPI_INT,MPI_SUM,matin->comm);CHKERRQ(ierr);
1443     info->nz_used      = irecv[0];
1444     info->nz_allocated = irecv[1];
1445     info->nz_unneeded  = irecv[2];
1446     info->memory       = irecv[3];
1447     info->mallocs      = irecv[4];
1448   }
1449   info->rows_global       = (double)a->M;
1450   info->columns_global    = (double)a->N;
1451   info->rows_local        = (double)a->m;
1452   info->columns_local     = (double)a->N;
1453   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1454   info->fill_ratio_needed = 0;
1455   info->factor_mallocs    = 0;
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNC__
1460 #define __FUNC__ "MatSetOption_MPIBAIJ"
1461 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1462 {
1463   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1464 
1465   PetscFunctionBegin;
1466   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1467       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1468       op == MAT_COLUMNS_UNSORTED ||
1469       op == MAT_COLUMNS_SORTED ||
1470       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1471       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1472         MatSetOption(a->A,op);
1473         MatSetOption(a->B,op);
1474   } else if (op == MAT_ROW_ORIENTED) {
1475         a->roworiented = 1;
1476         MatSetOption(a->A,op);
1477         MatSetOption(a->B,op);
1478   } else if (op == MAT_ROWS_SORTED ||
1479              op == MAT_ROWS_UNSORTED ||
1480              op == MAT_SYMMETRIC ||
1481              op == MAT_STRUCTURALLY_SYMMETRIC ||
1482              op == MAT_YES_NEW_DIAGONALS ||
1483              op == MAT_USE_HASH_TABLE)
1484     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1485   else if (op == MAT_COLUMN_ORIENTED) {
1486     a->roworiented = 0;
1487     MatSetOption(a->A,op);
1488     MatSetOption(a->B,op);
1489   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1490     a->donotstash = 1;
1491   } else if (op == MAT_NO_NEW_DIAGONALS) {
1492     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1493   } else if (op == MAT_USE_HASH_TABLE) {
1494     a->ht_flag = 1;
1495   } else {
1496     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1497   }
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNC__
1502 #define __FUNC__ "MatTranspose_MPIBAIJ("
1503 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1504 {
1505   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1506   Mat_SeqBAIJ *Aloc;
1507   Mat        B;
1508   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1509   int        bs=baij->bs,mbs=baij->mbs;
1510   Scalar     *a;
1511 
1512   PetscFunctionBegin;
1513   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1514   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1515   CHKERRQ(ierr);
1516 
1517   /* copy over the A part */
1518   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1519   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1520   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1521 
1522   for ( i=0; i<mbs; i++ ) {
1523     rvals[0] = bs*(baij->rstart + i);
1524     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1525     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1526       col = (baij->cstart+aj[j])*bs;
1527       for (k=0; k<bs; k++ ) {
1528         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1529         col++; a += bs;
1530       }
1531     }
1532   }
1533   /* copy over the B part */
1534   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1535   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1536   for ( i=0; i<mbs; i++ ) {
1537     rvals[0] = bs*(baij->rstart + i);
1538     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1539     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1540       col = baij->garray[aj[j]]*bs;
1541       for (k=0; k<bs; k++ ) {
1542         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1543         col++; a += bs;
1544       }
1545     }
1546   }
1547   PetscFree(rvals);
1548   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1549   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1550 
1551   if (matout != PETSC_NULL) {
1552     *matout = B;
1553   } else {
1554     PetscOps       *Abops;
1555     struct _MatOps *Aops;
1556 
1557     /* This isn't really an in-place transpose .... but free data structures from baij */
1558     PetscFree(baij->rowners);
1559     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1560     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1561     if (baij->colmap) PetscFree(baij->colmap);
1562     if (baij->garray) PetscFree(baij->garray);
1563     if (baij->lvec) VecDestroy(baij->lvec);
1564     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1565     PetscFree(baij);
1566 
1567     /*
1568        This is horrible, horrible code. We need to keep the
1569       A pointers for the bops and ops but copy everything
1570       else from C.
1571     */
1572     Abops = A->bops;
1573     Aops  = A->ops;
1574     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1575     A->bops = Abops;
1576     A->ops  = Aops;
1577 
1578     PetscHeaderDestroy(B);
1579   }
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 #undef __FUNC__
1584 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1585 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1586 {
1587   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1588   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1589   int ierr,s1,s2,s3;
1590 
1591   PetscFunctionBegin;
1592   if (ll)  {
1593     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1594     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1595     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1596     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1597     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1598   }
1599   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 #undef __FUNC__
1604 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1605 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1606 {
1607   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1608   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1609   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1610   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1611   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1612   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1613   MPI_Comm       comm = A->comm;
1614   MPI_Request    *send_waits,*recv_waits;
1615   MPI_Status     recv_status,*send_status;
1616   IS             istmp;
1617   PetscTruth     localdiag;
1618 
1619   PetscFunctionBegin;
1620   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1621   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1622 
1623   /*  first count number of contributors to each processor */
1624   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1625   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1626   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1627   for ( i=0; i<N; i++ ) {
1628     idx = rows[i];
1629     found = 0;
1630     for ( j=0; j<size; j++ ) {
1631       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1632         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1633       }
1634     }
1635     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1636   }
1637   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1638 
1639   /* inform other processors of number of messages and max length*/
1640   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1641   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1642   nrecvs = work[rank];
1643   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1644   nmax   = work[rank];
1645   PetscFree(work);
1646 
1647   /* post receives:   */
1648   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1649   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1650   for ( i=0; i<nrecvs; i++ ) {
1651     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1652   }
1653 
1654   /* do sends:
1655      1) starts[i] gives the starting index in svalues for stuff going to
1656      the ith processor
1657   */
1658   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1659   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1660   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1661   starts[0] = 0;
1662   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1663   for ( i=0; i<N; i++ ) {
1664     svalues[starts[owner[i]]++] = rows[i];
1665   }
1666   ISRestoreIndices(is,&rows);
1667 
1668   starts[0] = 0;
1669   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1670   count = 0;
1671   for ( i=0; i<size; i++ ) {
1672     if (procs[i]) {
1673       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1674     }
1675   }
1676   PetscFree(starts);
1677 
1678   base = owners[rank]*bs;
1679 
1680   /*  wait on receives */
1681   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1682   source = lens + nrecvs;
1683   count  = nrecvs; slen = 0;
1684   while (count) {
1685     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1686     /* unpack receives into our local space */
1687     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1688     source[imdex]  = recv_status.MPI_SOURCE;
1689     lens[imdex]  = n;
1690     slen += n;
1691     count--;
1692   }
1693   PetscFree(recv_waits);
1694 
1695   /* move the data into the send scatter */
1696   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1697   count = 0;
1698   for ( i=0; i<nrecvs; i++ ) {
1699     values = rvalues + i*nmax;
1700     for ( j=0; j<lens[i]; j++ ) {
1701       lrows[count++] = values[j] - base;
1702     }
1703   }
1704   PetscFree(rvalues); PetscFree(lens);
1705   PetscFree(owner); PetscFree(nprocs);
1706 
1707   /* actually zap the local rows */
1708   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1709   PLogObjectParent(A,istmp);
1710 
1711   /*
1712         Zero the required rows. If the "diagonal block" of the matrix
1713      is square and the user wishes to set the diagonal we use seperate
1714      code so that MatSetValues() is not called for each diagonal allocating
1715      new memory, thus calling lots of mallocs and slowing things down.
1716 
1717        Contributed by: Mathew Knepley
1718   */
1719   localdiag = PETSC_FALSE;
1720   if (diag && (l->A->M == l->A->N)) {
1721     localdiag = PETSC_TRUE;
1722     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1723   } else {
1724     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1725   }
1726   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1727   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1728 
1729   if (diag && (localdiag == PETSC_FALSE)) {
1730     for ( i = 0; i < slen; i++ ) {
1731       row = lrows[i] + rstart_bs;
1732       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1733     }
1734     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1735     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1736   }
1737   PetscFree(lrows);
1738 
1739   /* wait on sends */
1740   if (nsends) {
1741     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1742     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1743     PetscFree(send_status);
1744   }
1745   PetscFree(send_waits); PetscFree(svalues);
1746 
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 extern int MatPrintHelp_SeqBAIJ(Mat);
1751 #undef __FUNC__
1752 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1753 int MatPrintHelp_MPIBAIJ(Mat A)
1754 {
1755   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1756   MPI_Comm    comm = A->comm;
1757   static int  called = 0;
1758   int         ierr;
1759 
1760   PetscFunctionBegin;
1761   if (!a->rank) {
1762     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1763   }
1764   if (called) {PetscFunctionReturn(0);} else called = 1;
1765   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1766   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #undef __FUNC__
1771 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1772 int MatSetUnfactored_MPIBAIJ(Mat A)
1773 {
1774   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1775   int         ierr;
1776 
1777   PetscFunctionBegin;
1778   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1779   PetscFunctionReturn(0);
1780 }
1781 
1782 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
1783 
1784 /* -------------------------------------------------------------------*/
1785 static struct _MatOps MatOps = {
1786   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
1787   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
1788   0,0,0,0,
1789   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
1790   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
1791   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
1792   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
1793   0,0,0,MatGetSize_MPIBAIJ,
1794   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1795   0,0,MatConvertSameType_MPIBAIJ,0,0,
1796   0,0,0,MatGetSubMatrices_MPIBAIJ,
1797   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1798   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1799   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
1800 
1801 
1802 #undef __FUNC__
1803 #define __FUNC__ "MatCreateMPIBAIJ"
1804 /*@C
1805    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1806    (block compressed row).  For good matrix assembly performance
1807    the user should preallocate the matrix storage by setting the parameters
1808    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1809    performance can be increased by more than a factor of 50.
1810 
1811    Collective on MPI_Comm
1812 
1813    Input Parameters:
1814 +  comm - MPI communicator
1815 .  bs   - size of blockk
1816 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1817            This value should be the same as the local size used in creating the
1818            y vector for the matrix-vector product y = Ax.
1819 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1820            This value should be the same as the local size used in creating the
1821            x vector for the matrix-vector product y = Ax.
1822 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1823 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1824 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1825            submatrix  (same for all local rows)
1826 .  d_nzz - array containing the number of block nonzeros in the various block rows
1827            of the in diagonal portion of the local (possibly different for each block
1828            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1829 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1830            submatrix (same for all local rows).
1831 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1832            off-diagonal portion of the local submatrix (possibly different for
1833            each block row) or PETSC_NULL.
1834 
1835    Output Parameter:
1836 .  A - the matrix
1837 
1838    Options Database Keys:
1839 .   -mat_no_unroll - uses code that does not unroll the loops in the
1840                      block calculations (much slower)
1841 .   -mat_block_size - size of the blocks to use
1842 
1843    Notes:
1844    The user MUST specify either the local or global matrix dimensions
1845    (possibly both).
1846 
1847    Storage Information:
1848    For a square global matrix we define each processor's diagonal portion
1849    to be its local rows and the corresponding columns (a square submatrix);
1850    each processor's off-diagonal portion encompasses the remainder of the
1851    local matrix (a rectangular submatrix).
1852 
1853    The user can specify preallocated storage for the diagonal part of
1854    the local submatrix with either d_nz or d_nnz (not both).  Set
1855    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1856    memory allocation.  Likewise, specify preallocated storage for the
1857    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1858 
1859    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1860    the figure below we depict these three local rows and all columns (0-11).
1861 
1862 .vb
1863            0 1 2 3 4 5 6 7 8 9 10 11
1864           -------------------
1865    row 3  |  o o o d d d o o o o o o
1866    row 4  |  o o o d d d o o o o o o
1867    row 5  |  o o o d d d o o o o o o
1868           -------------------
1869 .ve
1870 
1871    Thus, any entries in the d locations are stored in the d (diagonal)
1872    submatrix, and any entries in the o locations are stored in the
1873    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1874    stored simply in the MATSEQBAIJ format for compressed row storage.
1875 
1876    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1877    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1878    In general, for PDE problems in which most nonzeros are near the diagonal,
1879    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1880    or you will get TERRIBLE performance; see the users' manual chapter on
1881    matrices.
1882 
1883 .keywords: matrix, block, aij, compressed row, sparse, parallel
1884 
1885 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1886 @*/
1887 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1888                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1889 {
1890   Mat          B;
1891   Mat_MPIBAIJ  *b;
1892   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1893 
1894   PetscFunctionBegin;
1895   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1896 
1897   MPI_Comm_size(comm,&size);
1898   if (size == 1) {
1899     if (M == PETSC_DECIDE) M = m;
1900     if (N == PETSC_DECIDE) N = n;
1901     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1902     PetscFunctionReturn(0);
1903   }
1904 
1905   *A = 0;
1906   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
1907   PLogObjectCreate(B);
1908   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1909   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1910   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1911 
1912   B->ops->destroy    = MatDestroy_MPIBAIJ;
1913   B->ops->view       = MatView_MPIBAIJ;
1914   B->mapping    = 0;
1915   B->factor     = 0;
1916   B->assembled  = PETSC_FALSE;
1917 
1918   B->insertmode = NOT_SET_VALUES;
1919   MPI_Comm_rank(comm,&b->rank);
1920   MPI_Comm_size(comm,&b->size);
1921 
1922   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1923     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1924   }
1925   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1926     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1927   }
1928   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1929     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1930   }
1931   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1932   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1933 
1934   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1935     work[0] = m; work[1] = n;
1936     mbs = m/bs; nbs = n/bs;
1937     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1938     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1939     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1940   }
1941   if (m == PETSC_DECIDE) {
1942     Mbs = M/bs;
1943     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1944     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1945     m   = mbs*bs;
1946   }
1947   if (n == PETSC_DECIDE) {
1948     Nbs = N/bs;
1949     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1950     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1951     n   = nbs*bs;
1952   }
1953   if (mbs*bs != m || nbs*bs != n) {
1954     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1955   }
1956 
1957   b->m = m; B->m = m;
1958   b->n = n; B->n = n;
1959   b->N = N; B->N = N;
1960   b->M = M; B->M = M;
1961   b->bs  = bs;
1962   b->bs2 = bs*bs;
1963   b->mbs = mbs;
1964   b->nbs = nbs;
1965   b->Mbs = Mbs;
1966   b->Nbs = Nbs;
1967 
1968   /* build local table of row and column ownerships */
1969   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1970   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1971   b->cowners = b->rowners + b->size + 2;
1972   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1973   b->rowners[0] = 0;
1974   for ( i=2; i<=b->size; i++ ) {
1975     b->rowners[i] += b->rowners[i-1];
1976   }
1977   b->rstart    = b->rowners[b->rank];
1978   b->rend      = b->rowners[b->rank+1];
1979   b->rstart_bs = b->rstart * bs;
1980   b->rend_bs   = b->rend * bs;
1981 
1982   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1983   b->cowners[0] = 0;
1984   for ( i=2; i<=b->size; i++ ) {
1985     b->cowners[i] += b->cowners[i-1];
1986   }
1987   b->cstart    = b->cowners[b->rank];
1988   b->cend      = b->cowners[b->rank+1];
1989   b->cstart_bs = b->cstart * bs;
1990   b->cend_bs   = b->cend * bs;
1991 
1992 
1993   if (d_nz == PETSC_DEFAULT) d_nz = 5;
1994   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
1995   PLogObjectParent(B,b->A);
1996   if (o_nz == PETSC_DEFAULT) o_nz = 0;
1997   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
1998   PLogObjectParent(B,b->B);
1999 
2000   /* build cache for off array entries formed */
2001   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
2002   b->donotstash  = 0;
2003   b->colmap      = 0;
2004   b->garray      = 0;
2005   b->roworiented = 1;
2006 
2007   /* stuff used in block assembly */
2008   b->barray       = 0;
2009 
2010   /* stuff used for matrix vector multiply */
2011   b->lvec         = 0;
2012   b->Mvctx        = 0;
2013 
2014   /* stuff for MatGetRow() */
2015   b->rowindices   = 0;
2016   b->rowvalues    = 0;
2017   b->getrowactive = PETSC_FALSE;
2018 
2019   /* hash table stuff */
2020   b->ht           = 0;
2021   b->hd           = 0;
2022   b->ht_size      = 0;
2023   b->ht_flag      = 0;
2024   b->ht_fact      = 0;
2025   b->ht_total_ct  = 0;
2026   b->ht_insert_ct = 0;
2027 
2028   *A = B;
2029   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2030   if (flg) {
2031     double fact = 1.39;
2032     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2033     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2034     if (fact <= 1.0) fact = 1.39;
2035     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2036     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2037   }
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 #undef __FUNC__
2042 #define __FUNC__ "MatConvertSameType_MPIBAIJ"
2043 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
2044 {
2045   Mat         mat;
2046   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2047   int         ierr, len=0, flg;
2048 
2049   PetscFunctionBegin;
2050   *newmat       = 0;
2051   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
2052   PLogObjectCreate(mat);
2053   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2054   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
2055   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2056   mat->ops->view       = MatView_MPIBAIJ;
2057   mat->factor     = matin->factor;
2058   mat->assembled  = PETSC_TRUE;
2059 
2060   a->m = mat->m   = oldmat->m;
2061   a->n = mat->n   = oldmat->n;
2062   a->M = mat->M   = oldmat->M;
2063   a->N = mat->N   = oldmat->N;
2064 
2065   a->bs  = oldmat->bs;
2066   a->bs2 = oldmat->bs2;
2067   a->mbs = oldmat->mbs;
2068   a->nbs = oldmat->nbs;
2069   a->Mbs = oldmat->Mbs;
2070   a->Nbs = oldmat->Nbs;
2071 
2072   a->rstart       = oldmat->rstart;
2073   a->rend         = oldmat->rend;
2074   a->cstart       = oldmat->cstart;
2075   a->cend         = oldmat->cend;
2076   a->size         = oldmat->size;
2077   a->rank         = oldmat->rank;
2078   mat->insertmode = NOT_SET_VALUES;
2079   a->rowvalues    = 0;
2080   a->getrowactive = PETSC_FALSE;
2081   a->barray       = 0;
2082 
2083   /* hash table stuff */
2084   a->ht           = 0;
2085   a->hd           = 0;
2086   a->ht_size      = 0;
2087   a->ht_flag      = oldmat->ht_flag;
2088   a->ht_fact      = oldmat->ht_fact;
2089   a->ht_total_ct  = 0;
2090   a->ht_insert_ct = 0;
2091 
2092 
2093   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2094   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2095   a->cowners = a->rowners + a->size + 2;
2096   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2097   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
2098   if (oldmat->colmap) {
2099     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2100     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2101     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2102   } else a->colmap = 0;
2103   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2104     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2105     PLogObjectMemory(mat,len*sizeof(int));
2106     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2107   } else a->garray = 0;
2108 
2109   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2110   PLogObjectParent(mat,a->lvec);
2111   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2112   PLogObjectParent(mat,a->Mvctx);
2113   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
2114   PLogObjectParent(mat,a->A);
2115   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
2116   PLogObjectParent(mat,a->B);
2117   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2118   if (flg) {
2119     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2120   }
2121   *newmat = mat;
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 #include "sys.h"
2126 
2127 #undef __FUNC__
2128 #define __FUNC__ "MatLoad_MPIBAIJ"
2129 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2130 {
2131   Mat          A;
2132   int          i, nz, ierr, j,rstart, rend, fd;
2133   Scalar       *vals,*buf;
2134   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2135   MPI_Status   status;
2136   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2137   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2138   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2139   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2140   int          dcount,kmax,k,nzcount,tmp;
2141 
2142   PetscFunctionBegin;
2143   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2144 
2145   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2146   if (!rank) {
2147     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2148     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2149     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2150     if (header[3] < 0) {
2151       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2152     }
2153   }
2154 
2155   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2156   M = header[1]; N = header[2];
2157 
2158   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2159 
2160   /*
2161      This code adds extra rows to make sure the number of rows is
2162      divisible by the blocksize
2163   */
2164   Mbs        = M/bs;
2165   extra_rows = bs - M + bs*(Mbs);
2166   if (extra_rows == bs) extra_rows = 0;
2167   else                  Mbs++;
2168   if (extra_rows &&!rank) {
2169     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2170   }
2171 
2172   /* determine ownership of all rows */
2173   mbs = Mbs/size + ((Mbs % size) > rank);
2174   m   = mbs * bs;
2175   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2176   browners = rowners + size + 1;
2177   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2178   rowners[0] = 0;
2179   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2180   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2181   rstart = rowners[rank];
2182   rend   = rowners[rank+1];
2183 
2184   /* distribute row lengths to all processors */
2185   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2186   if (!rank) {
2187     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2188     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2189     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2190     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2191     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2192     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2193     PetscFree(sndcounts);
2194   } else {
2195     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2196   }
2197 
2198   if (!rank) {
2199     /* calculate the number of nonzeros on each processor */
2200     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2201     PetscMemzero(procsnz,size*sizeof(int));
2202     for ( i=0; i<size; i++ ) {
2203       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2204         procsnz[i] += rowlengths[j];
2205       }
2206     }
2207     PetscFree(rowlengths);
2208 
2209     /* determine max buffer needed and allocate it */
2210     maxnz = 0;
2211     for ( i=0; i<size; i++ ) {
2212       maxnz = PetscMax(maxnz,procsnz[i]);
2213     }
2214     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2215 
2216     /* read in my part of the matrix column indices  */
2217     nz = procsnz[0];
2218     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2219     mycols = ibuf;
2220     if (size == 1)  nz -= extra_rows;
2221     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2222     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2223 
2224     /* read in every ones (except the last) and ship off */
2225     for ( i=1; i<size-1; i++ ) {
2226       nz   = procsnz[i];
2227       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2228       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2229     }
2230     /* read in the stuff for the last proc */
2231     if ( size != 1 ) {
2232       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2233       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2234       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2235       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2236     }
2237     PetscFree(cols);
2238   } else {
2239     /* determine buffer space needed for message */
2240     nz = 0;
2241     for ( i=0; i<m; i++ ) {
2242       nz += locrowlens[i];
2243     }
2244     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2245     mycols = ibuf;
2246     /* receive message of column indices*/
2247     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2248     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2249     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2250   }
2251 
2252   /* loop over local rows, determining number of off diagonal entries */
2253   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2254   odlens = dlens + (rend-rstart);
2255   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2256   PetscMemzero(mask,3*Mbs*sizeof(int));
2257   masked1 = mask    + Mbs;
2258   masked2 = masked1 + Mbs;
2259   rowcount = 0; nzcount = 0;
2260   for ( i=0; i<mbs; i++ ) {
2261     dcount  = 0;
2262     odcount = 0;
2263     for ( j=0; j<bs; j++ ) {
2264       kmax = locrowlens[rowcount];
2265       for ( k=0; k<kmax; k++ ) {
2266         tmp = mycols[nzcount++]/bs;
2267         if (!mask[tmp]) {
2268           mask[tmp] = 1;
2269           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2270           else masked1[dcount++] = tmp;
2271         }
2272       }
2273       rowcount++;
2274     }
2275 
2276     dlens[i]  = dcount;
2277     odlens[i] = odcount;
2278 
2279     /* zero out the mask elements we set */
2280     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2281     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2282   }
2283 
2284   /* create our matrix */
2285   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2286          CHKERRQ(ierr);
2287   A = *newmat;
2288   MatSetOption(A,MAT_COLUMNS_SORTED);
2289 
2290   if (!rank) {
2291     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2292     /* read in my part of the matrix numerical values  */
2293     nz = procsnz[0];
2294     vals = buf;
2295     mycols = ibuf;
2296     if (size == 1)  nz -= extra_rows;
2297     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2298     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2299 
2300     /* insert into matrix */
2301     jj      = rstart*bs;
2302     for ( i=0; i<m; i++ ) {
2303       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2304       mycols += locrowlens[i];
2305       vals   += locrowlens[i];
2306       jj++;
2307     }
2308     /* read in other processors (except the last one) and ship out */
2309     for ( i=1; i<size-1; i++ ) {
2310       nz   = procsnz[i];
2311       vals = buf;
2312       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2313       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2314     }
2315     /* the last proc */
2316     if ( size != 1 ){
2317       nz   = procsnz[i] - extra_rows;
2318       vals = buf;
2319       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2320       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2321       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2322     }
2323     PetscFree(procsnz);
2324   } else {
2325     /* receive numeric values */
2326     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2327 
2328     /* receive message of values*/
2329     vals   = buf;
2330     mycols = ibuf;
2331     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2332     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2333     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2334 
2335     /* insert into matrix */
2336     jj      = rstart*bs;
2337     for ( i=0; i<m; i++ ) {
2338       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2339       mycols += locrowlens[i];
2340       vals   += locrowlens[i];
2341       jj++;
2342     }
2343   }
2344   PetscFree(locrowlens);
2345   PetscFree(buf);
2346   PetscFree(ibuf);
2347   PetscFree(rowners);
2348   PetscFree(dlens);
2349   PetscFree(mask);
2350   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2351   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2352   PetscFunctionReturn(0);
2353 }
2354 
2355 
2356 
2357 #undef __FUNC__
2358 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2359 /*@
2360    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2361 
2362    Input Parameters:
2363 .  mat  - the matrix
2364 .  fact - factor
2365 
2366    Collective on Mat
2367 
2368   Notes:
2369    This can also be set by the command line option: -mat_use_hash_table fact
2370 
2371 .keywords: matrix, hashtable, factor, HT
2372 
2373 .seealso: MatSetOption()
2374 @*/
2375 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2376 {
2377   Mat_MPIBAIJ *baij;
2378 
2379   PetscFunctionBegin;
2380   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2381   if (mat->type != MATMPIBAIJ) {
2382       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2383   }
2384   baij = (Mat_MPIBAIJ*) mat->data;
2385   baij->ht_fact = fact;
2386   PetscFunctionReturn(0);
2387 }
2388