xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 6831982abb6453c2f3e25efecb5d0951d333371e)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.180 1999/10/13 20:37:30 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 EXTERN_C_BEGIN
20 #undef __FUNC__
21 #define __FUNC__ "MatStoreValues_MPIBAIJ"
22 int MatStoreValues_MPIBAIJ(Mat mat)
23 {
24   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
25   int         ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
29   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 EXTERN_C_END
33 
34 EXTERN_C_BEGIN
35 #undef __FUNC__
36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
37 int MatRetrieveValues_MPIBAIJ(Mat mat)
38 {
39   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
40   int         ierr;
41 
42   PetscFunctionBegin;
43   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
44   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
45   PetscFunctionReturn(0);
46 }
47 EXTERN_C_END
48 
49 /*
50      Local utility routine that creates a mapping from the global column
51    number to the local number in the off-diagonal part of the local
52    storage of the matrix.  This is done in a non scable way since the
53    length of colmap equals the global matrix length.
54 */
55 #undef __FUNC__
56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
57 static int CreateColmap_MPIBAIJ_Private(Mat mat)
58 {
59   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
60   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
61   int         nbs = B->nbs,i,bs=B->bs,ierr;
62 
63   PetscFunctionBegin;
64 #if defined (PETSC_USE_CTABLE)
65   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
66   for ( i=0; i<nbs; i++ ){
67     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
68   }
69 #else
70   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
71   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
72   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
73   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
74 #endif
75   PetscFunctionReturn(0);
76 }
77 
78 #define CHUNKSIZE  10
79 
80 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
81 { \
82  \
83     brow = row/bs;  \
84     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
85     rmax = aimax[brow]; nrow = ailen[brow]; \
86       bcol = col/bs; \
87       ridx = row % bs; cidx = col % bs; \
88       low = 0; high = nrow; \
89       while (high-low > 3) { \
90         t = (low+high)/2; \
91         if (rp[t] > bcol) high = t; \
92         else              low  = t; \
93       } \
94       for ( _i=low; _i<high; _i++ ) { \
95         if (rp[_i] > bcol) break; \
96         if (rp[_i] == bcol) { \
97           bap  = ap +  bs2*_i + bs*cidx + ridx; \
98           if (addv == ADD_VALUES) *bap += value;  \
99           else                    *bap  = value;  \
100           goto a_noinsert; \
101         } \
102       } \
103       if (a->nonew == 1) goto a_noinsert; \
104       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
105       if (nrow >= rmax) { \
106         /* there is no extra room in row, therefore enlarge */ \
107         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
108         Scalar *new_a; \
109  \
110         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
111  \
112         /* malloc new storage space */ \
113         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
114         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
115         new_j   = (int *) (new_a + bs2*new_nz); \
116         new_i   = new_j + new_nz; \
117  \
118         /* copy over old data into new slots */ \
119         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
120         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
121         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
122         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
123         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
124                           len*sizeof(int));CHKERRQ(ierr); \
125         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
126         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
127         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
128                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
129         /* free up old matrix storage */ \
130         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
131         if (!a->singlemalloc) { \
132           ierr = PetscFree(a->i);CHKERRQ(ierr); \
133           ierr = PetscFree(a->j);CHKERRQ(ierr);\
134         } \
135         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
136         a->singlemalloc = 1; \
137  \
138         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
139         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
140         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
141         a->maxnz += bs2*CHUNKSIZE; \
142         a->reallocs++; \
143         a->nz++; \
144       } \
145       N = nrow++ - 1;  \
146       /* shift up all the later entries in this row */ \
147       for ( ii=N; ii>=_i; ii-- ) { \
148         rp[ii+1] = rp[ii]; \
149         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
150       } \
151       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
152       rp[_i]                      = bcol;  \
153       ap[bs2*_i + bs*cidx + ridx] = value;  \
154       a_noinsert:; \
155     ailen[brow] = nrow; \
156 }
157 
158 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
159 { \
160  \
161     brow = row/bs;  \
162     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
163     rmax = bimax[brow]; nrow = bilen[brow]; \
164       bcol = col/bs; \
165       ridx = row % bs; cidx = col % bs; \
166       low = 0; high = nrow; \
167       while (high-low > 3) { \
168         t = (low+high)/2; \
169         if (rp[t] > bcol) high = t; \
170         else              low  = t; \
171       } \
172       for ( _i=low; _i<high; _i++ ) { \
173         if (rp[_i] > bcol) break; \
174         if (rp[_i] == bcol) { \
175           bap  = ap +  bs2*_i + bs*cidx + ridx; \
176           if (addv == ADD_VALUES) *bap += value;  \
177           else                    *bap  = value;  \
178           goto b_noinsert; \
179         } \
180       } \
181       if (b->nonew == 1) goto b_noinsert; \
182       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
183       if (nrow >= rmax) { \
184         /* there is no extra room in row, therefore enlarge */ \
185         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
186         Scalar *new_a; \
187  \
188         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
189  \
190         /* malloc new storage space */ \
191         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
192         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
193         new_j   = (int *) (new_a + bs2*new_nz); \
194         new_i   = new_j + new_nz; \
195  \
196         /* copy over old data into new slots */ \
197         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
198         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
199         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
200         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
201         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
202                            len*sizeof(int));CHKERRQ(ierr); \
203         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
204         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
205         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
206                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
207         /* free up old matrix storage */ \
208         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
209         if (!b->singlemalloc) { \
210           ierr = PetscFree(b->i);CHKERRQ(ierr); \
211           ierr = PetscFree(b->j);CHKERRQ(ierr); \
212         } \
213         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
214         b->singlemalloc = 1; \
215  \
216         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
217         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
218         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
219         b->maxnz += bs2*CHUNKSIZE; \
220         b->reallocs++; \
221         b->nz++; \
222       } \
223       N = nrow++ - 1;  \
224       /* shift up all the later entries in this row */ \
225       for ( ii=N; ii>=_i; ii-- ) { \
226         rp[ii+1] = rp[ii]; \
227         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
228       } \
229       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
230       rp[_i]                      = bcol;  \
231       ap[bs2*_i + bs*cidx + ridx] = value;  \
232       b_noinsert:; \
233     bilen[brow] = nrow; \
234 }
235 
236 #undef __FUNC__
237 #define __FUNC__ "MatSetValues_MPIBAIJ"
238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
239 {
240   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
241   Scalar      value;
242   int         ierr,i,j,row,col;
243   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
244   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
245   int         cend_orig=baij->cend_bs,bs=baij->bs;
246 
247   /* Some Variables required in the macro */
248   Mat         A = baij->A;
249   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
250   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
251   Scalar      *aa=a->a;
252 
253   Mat         B = baij->B;
254   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
255   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
256   Scalar      *ba=b->a;
257 
258   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
259   int         low,high,t,ridx,cidx,bs2=a->bs2;
260   Scalar      *ap,*bap;
261 
262   PetscFunctionBegin;
263   for ( i=0; i<m; i++ ) {
264     if (im[i] < 0) continue;
265 #if defined(PETSC_USE_BOPT_g)
266     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
267 #endif
268     if (im[i] >= rstart_orig && im[i] < rend_orig) {
269       row = im[i] - rstart_orig;
270       for ( j=0; j<n; j++ ) {
271         if (in[j] >= cstart_orig && in[j] < cend_orig){
272           col = in[j] - cstart_orig;
273           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
274           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
275           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
276         } else if (in[j] < 0) continue;
277 #if defined(PETSC_USE_BOPT_g)
278         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
279 #endif
280         else {
281           if (mat->was_assembled) {
282             if (!baij->colmap) {
283               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
284             }
285 #if defined (PETSC_USE_CTABLE)
286             ierr = PetscTableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
287             col  = col - 1 + in[j]%bs;
288 #else
289             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
290 #endif
291             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
292               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
293               col =  in[j];
294               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
295               B = baij->B;
296               b = (Mat_SeqBAIJ *) (B)->data;
297               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
298               ba=b->a;
299             }
300           } else col = in[j];
301           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
302           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
303           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
304         }
305       }
306     } else {
307       if ( !baij->donotstash) {
308         if (roworiented) {
309           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
310         } else {
311           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
312         }
313       }
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
322 {
323   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
324   Scalar      *value,*barray=baij->barray;
325   int         ierr,i,j,ii,jj,row,col;
326   int         roworiented = baij->roworiented,rstart=baij->rstart ;
327   int         rend=baij->rend,cstart=baij->cstart,stepval;
328   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
329 
330   PetscFunctionBegin;
331   if(!barray) {
332     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
333   }
334 
335   if (roworiented) {
336     stepval = (n-1)*bs;
337   } else {
338     stepval = (m-1)*bs;
339   }
340   for ( i=0; i<m; i++ ) {
341     if (im[i] < 0) continue;
342 #if defined(PETSC_USE_BOPT_g)
343     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
344 #endif
345     if (im[i] >= rstart && im[i] < rend) {
346       row = im[i] - rstart;
347       for ( j=0; j<n; j++ ) {
348         /* If NumCol = 1 then a copy is not required */
349         if ((roworiented) && (n == 1)) {
350           barray = v + i*bs2;
351         } else if((!roworiented) && (m == 1)) {
352           barray = v + j*bs2;
353         } else { /* Here a copy is required */
354           if (roworiented) {
355             value = v + i*(stepval+bs)*bs + j*bs;
356           } else {
357             value = v + j*(stepval+bs)*bs + i*bs;
358           }
359           for ( ii=0; ii<bs; ii++,value+=stepval ) {
360             for (jj=0; jj<bs; jj++ ) {
361               *barray++  = *value++;
362             }
363           }
364           barray -=bs2;
365         }
366 
367         if (in[j] >= cstart && in[j] < cend){
368           col  = in[j] - cstart;
369           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
370         }
371         else if (in[j] < 0) continue;
372 #if defined(PETSC_USE_BOPT_g)
373         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
374 #endif
375         else {
376           if (mat->was_assembled) {
377             if (!baij->colmap) {
378               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
379             }
380 
381 #if defined(PETSC_USE_BOPT_g)
382 #if defined (PETSC_USE_CTABLE)
383             { int data;
384               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
385               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
386             }
387 #else
388             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
389 #endif
390 #endif
391 #if defined (PETSC_USE_CTABLE)
392 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
393             col  = (col - 1)/bs;
394 #else
395             col = (baij->colmap[in[j]] - 1)/bs;
396 #endif
397             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
398               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
399               col =  in[j];
400             }
401           }
402           else col = in[j];
403           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
404         }
405       }
406     } else {
407       if (!baij->donotstash) {
408         if (roworiented) {
409           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
410         } else {
411           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         }
413       }
414     }
415   }
416   PetscFunctionReturn(0);
417 }
418 #define HASH_KEY 0.6180339887
419 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
420 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
421 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
422 #undef __FUNC__
423 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
424 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
425 {
426   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
427   int         ierr,i,j,row,col;
428   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
429   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
430   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
431   double      tmp;
432   Scalar      ** HD = baij->hd,value;
433 #if defined(PETSC_USE_BOPT_g)
434   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
435 #endif
436 
437   PetscFunctionBegin;
438 
439   for ( i=0; i<m; i++ ) {
440 #if defined(PETSC_USE_BOPT_g)
441     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
442     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
443 #endif
444       row = im[i];
445     if (row >= rstart_orig && row < rend_orig) {
446       for ( j=0; j<n; j++ ) {
447         col = in[j];
448         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
449         /* Look up into the Hash Table */
450         key = (row/bs)*Nbs+(col/bs)+1;
451         h1  = HASH(size,key,tmp);
452 
453 
454         idx = h1;
455 #if defined(PETSC_USE_BOPT_g)
456         insert_ct++;
457         total_ct++;
458         if (HT[idx] != key) {
459           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
460           if (idx == size) {
461             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
462             if (idx == h1) {
463               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
464             }
465           }
466         }
467 #else
468         if (HT[idx] != key) {
469           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
470           if (idx == size) {
471             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
472             if (idx == h1) {
473               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
474             }
475           }
476         }
477 #endif
478         /* A HASH table entry is found, so insert the values at the correct address */
479         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
480         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
481       }
482     } else {
483       if (!baij->donotstash) {
484         if (roworiented) {
485           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
486         } else {
487           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
488         }
489       }
490     }
491   }
492 #if defined(PETSC_USE_BOPT_g)
493   baij->ht_total_ct = total_ct;
494   baij->ht_insert_ct = insert_ct;
495 #endif
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNC__
500 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
501 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
502 {
503   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
504   int         ierr,i,j,ii,jj,row,col;
505   int         roworiented = baij->roworiented,rstart=baij->rstart ;
506   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
507   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
508   double      tmp;
509   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
510 #if defined(PETSC_USE_BOPT_g)
511   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
512 #endif
513 
514   PetscFunctionBegin;
515 
516   if (roworiented) {
517     stepval = (n-1)*bs;
518   } else {
519     stepval = (m-1)*bs;
520   }
521   for ( i=0; i<m; i++ ) {
522 #if defined(PETSC_USE_BOPT_g)
523     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
524     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
525 #endif
526     row   = im[i];
527     v_t   = v + i*bs2;
528     if (row >= rstart && row < rend) {
529       for ( j=0; j<n; j++ ) {
530         col = in[j];
531 
532         /* Look up into the Hash Table */
533         key = row*Nbs+col+1;
534         h1  = HASH(size,key,tmp);
535 
536         idx = h1;
537 #if defined(PETSC_USE_BOPT_g)
538         total_ct++;
539         insert_ct++;
540        if (HT[idx] != key) {
541           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
542           if (idx == size) {
543             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
544             if (idx == h1) {
545               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
546             }
547           }
548         }
549 #else
550         if (HT[idx] != key) {
551           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
552           if (idx == size) {
553             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
554             if (idx == h1) {
555               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
556             }
557           }
558         }
559 #endif
560         baij_a = HD[idx];
561         if (roworiented) {
562           /*value = v + i*(stepval+bs)*bs + j*bs;*/
563           /* value = v + (i*(stepval+bs)+j)*bs; */
564           value = v_t;
565           v_t  += bs;
566           if (addv == ADD_VALUES) {
567             for ( ii=0; ii<bs; ii++,value+=stepval) {
568               for ( jj=ii; jj<bs2; jj+=bs ) {
569                 baij_a[jj]  += *value++;
570               }
571             }
572           } else {
573             for ( ii=0; ii<bs; ii++,value+=stepval) {
574               for ( jj=ii; jj<bs2; jj+=bs ) {
575                 baij_a[jj]  = *value++;
576               }
577             }
578           }
579         } else {
580           value = v + j*(stepval+bs)*bs + i*bs;
581           if (addv == ADD_VALUES) {
582             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
583               for ( jj=0; jj<bs; jj++ ) {
584                 baij_a[jj]  += *value++;
585               }
586             }
587           } else {
588             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
589               for ( jj=0; jj<bs; jj++ ) {
590                 baij_a[jj]  = *value++;
591               }
592             }
593           }
594         }
595       }
596     } else {
597       if (!baij->donotstash) {
598         if (roworiented) {
599           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
600         } else {
601           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
602         }
603       }
604     }
605   }
606 #if defined(PETSC_USE_BOPT_g)
607   baij->ht_total_ct = total_ct;
608   baij->ht_insert_ct = insert_ct;
609 #endif
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNC__
614 #define __FUNC__ "MatGetValues_MPIBAIJ"
615 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
616 {
617   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
618   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
619   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
620 
621   PetscFunctionBegin;
622   for ( i=0; i<m; i++ ) {
623     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
624     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
625     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
626       row = idxm[i] - bsrstart;
627       for ( j=0; j<n; j++ ) {
628         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
629         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
630         if (idxn[j] >= bscstart && idxn[j] < bscend){
631           col = idxn[j] - bscstart;
632           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633         } else {
634           if (!baij->colmap) {
635             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
636           }
637 #if defined (PETSC_USE_CTABLE)
638           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
639           data --;
640 #else
641           data = baij->colmap[idxn[j]/bs]-1;
642 #endif
643           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
644           else {
645             col  = data + idxn[j]%bs;
646             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647           }
648         }
649       }
650     } else {
651       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
652     }
653   }
654  PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNC__
658 #define __FUNC__ "MatNorm_MPIBAIJ"
659 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
660 {
661   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
662   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
663   int        ierr, i,bs2=baij->bs2;
664   double     sum = 0.0;
665   Scalar     *v;
666 
667   PetscFunctionBegin;
668   if (baij->size == 1) {
669     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
670   } else {
671     if (type == NORM_FROBENIUS) {
672       v = amat->a;
673       for (i=0; i<amat->nz*bs2; i++ ) {
674 #if defined(PETSC_USE_COMPLEX)
675         sum += PetscReal(PetscConj(*v)*(*v)); v++;
676 #else
677         sum += (*v)*(*v); v++;
678 #endif
679       }
680       v = bmat->a;
681       for (i=0; i<bmat->nz*bs2; i++ ) {
682 #if defined(PETSC_USE_COMPLEX)
683         sum += PetscReal(PetscConj(*v)*(*v)); v++;
684 #else
685         sum += (*v)*(*v); v++;
686 #endif
687       }
688       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
689       *norm = sqrt(*norm);
690     } else {
691       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
692     }
693   }
694   PetscFunctionReturn(0);
695 }
696 
697 
698 /*
699   Creates the hash table, and sets the table
700   This table is created only once.
701   If new entried need to be added to the matrix
702   then the hash table has to be destroyed and
703   recreated.
704 */
705 #undef __FUNC__
706 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
707 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
708 {
709   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
710   Mat         A = baij->A, B=baij->B;
711   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
712   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
713   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
714   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
715   int         *HT,key;
716   Scalar      **HD;
717   double      tmp;
718 #if defined(PETSC_USE_BOPT_g)
719   int         ct=0,max=0;
720 #endif
721 
722   PetscFunctionBegin;
723   baij->ht_size=(int)(factor*nz);
724   size = baij->ht_size;
725 
726   if (baij->ht) {
727     PetscFunctionReturn(0);
728   }
729 
730   /* Allocate Memory for Hash Table */
731   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
732   baij->ht = (int*)(baij->hd + size);
733   HD = baij->hd;
734   HT = baij->ht;
735 
736 
737   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
738 
739 
740   /* Loop Over A */
741   for ( i=0; i<a->mbs; i++ ) {
742     for ( j=ai[i]; j<ai[i+1]; j++ ) {
743       row = i+rstart;
744       col = aj[j]+cstart;
745 
746       key = row*Nbs + col + 1;
747       h1  = HASH(size,key,tmp);
748       for ( k=0; k<size; k++ ){
749         if (HT[(h1+k)%size] == 0.0) {
750           HT[(h1+k)%size] = key;
751           HD[(h1+k)%size] = a->a + j*bs2;
752           break;
753 #if defined(PETSC_USE_BOPT_g)
754         } else {
755           ct++;
756 #endif
757         }
758       }
759 #if defined(PETSC_USE_BOPT_g)
760       if (k> max) max = k;
761 #endif
762     }
763   }
764   /* Loop Over B */
765   for ( i=0; i<b->mbs; i++ ) {
766     for ( j=bi[i]; j<bi[i+1]; j++ ) {
767       row = i+rstart;
768       col = garray[bj[j]];
769       key = row*Nbs + col + 1;
770       h1  = HASH(size,key,tmp);
771       for ( k=0; k<size; k++ ){
772         if (HT[(h1+k)%size] == 0.0) {
773           HT[(h1+k)%size] = key;
774           HD[(h1+k)%size] = b->a + j*bs2;
775           break;
776 #if defined(PETSC_USE_BOPT_g)
777         } else {
778           ct++;
779 #endif
780         }
781       }
782 #if defined(PETSC_USE_BOPT_g)
783       if (k> max) max = k;
784 #endif
785     }
786   }
787 
788   /* Print Summary */
789 #if defined(PETSC_USE_BOPT_g)
790   for ( i=0,j=0; i<size; i++)
791     if (HT[i]) {j++;}
792   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
793            (j== 0)? 0.0:((double)(ct+j))/j,max);
794 #endif
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNC__
799 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
800 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
801 {
802   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
803   int         ierr,nstash,reallocs;
804   InsertMode  addv;
805 
806   PetscFunctionBegin;
807   if (baij->donotstash) {
808     PetscFunctionReturn(0);
809   }
810 
811   /* make sure all processors are either in INSERTMODE or ADDMODE */
812   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
813   if (addv == (ADD_VALUES|INSERT_VALUES)) {
814     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
815   }
816   mat->insertmode = addv; /* in case this processor had no cache */
817 
818   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
819   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
820   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
821   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
822   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
823   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNC__
828 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
829 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
830 {
831   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
832   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
833   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
834   int         *row,*col,other_disassembled,r1,r2,r3;
835   Scalar      *val;
836   InsertMode  addv = mat->insertmode;
837 
838   PetscFunctionBegin;
839   if (!baij->donotstash) {
840     while (1) {
841       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
842       if (!flg) break;
843 
844       for ( i=0; i<n; ) {
845         /* Now identify the consecutive vals belonging to the same row */
846         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
847         if (j < n) ncols = j-i;
848         else       ncols = n-i;
849         /* Now assemble all these values with a single function call */
850         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
851         i = j;
852       }
853     }
854     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
855     /* Now process the block-stash. Since the values are stashed column-oriented,
856        set the roworiented flag to column oriented, and after MatSetValues()
857        restore the original flags */
858     r1 = baij->roworiented;
859     r2 = a->roworiented;
860     r3 = b->roworiented;
861     baij->roworiented = 0;
862     a->roworiented    = 0;
863     b->roworiented    = 0;
864     while (1) {
865       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
866       if (!flg) break;
867 
868       for ( i=0; i<n; ) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
871         if (j < n) ncols = j-i;
872         else       ncols = n-i;
873         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
874         i = j;
875       }
876     }
877     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
878     baij->roworiented = r1;
879     a->roworiented    = r2;
880     b->roworiented    = r3;
881   }
882 
883   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
884   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
885 
886   /* determine if any processor has disassembled, if so we must
887      also disassemble ourselfs, in order that we may reassemble. */
888   /*
889      if nonzero structure of submatrix B cannot change then we know that
890      no processor disassembled thus we can skip this stuff
891   */
892   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
893     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
894     if (mat->was_assembled && !other_disassembled) {
895       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
896     }
897   }
898 
899   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
900     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
901   }
902   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
903   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
904 
905 #if defined(PETSC_USE_BOPT_g)
906   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
907     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
908              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
909     baij->ht_total_ct  = 0;
910     baij->ht_insert_ct = 0;
911   }
912 #endif
913   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
914     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
915     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
916     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
917   }
918 
919   if (baij->rowvalues) {
920     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
921     baij->rowvalues = 0;
922   }
923   PetscFunctionReturn(0);
924 }
925 
926 /*
927 #undef __FUNC__
928 #define __FUNC__ "MatView_MPIBAIJ_Binary"
929 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
930 {
931   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
932   int          ierr;
933 
934   PetscFunctionBegin;
935   if (baij->size == 1) {
936     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
937   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
938   PetscFunctionReturn(0);
939 }
940 */
941 
942 #undef __FUNC__
943 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
944 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
945 {
946   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
947   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
948   PetscTruth   isascii,isdraw;
949 
950   PetscFunctionBegin;
951   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
952   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
953   if (isascii) {
954     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
955     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
956       MatInfo info;
957       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
958       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
960               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
961               baij->bs,(int)info.memory);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
964       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
966       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
967       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
968       PetscFunctionReturn(0);
969     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
970       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
971       PetscFunctionReturn(0);
972     }
973   }
974 
975   if (isdraw) {
976     Draw       draw;
977     PetscTruth isnull;
978     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
979     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
980   }
981 
982   if (size == 1) {
983     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
984   } else {
985     /* assemble the entire matrix onto first processor. */
986     Mat         A;
987     Mat_SeqBAIJ *Aloc;
988     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
989     int         mbs = baij->mbs;
990     Scalar      *a;
991 
992     if (!rank) {
993       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
994     } else {
995       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
996     }
997     PLogObjectParent(mat,A);
998 
999     /* copy over the A part */
1000     Aloc  = (Mat_SeqBAIJ*) baij->A->data;
1001     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1002     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1003 
1004     for ( i=0; i<mbs; i++ ) {
1005       rvals[0] = bs*(baij->rstart + i);
1006       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1007       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1008         col = (baij->cstart+aj[j])*bs;
1009         for (k=0; k<bs; k++ ) {
1010           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1011           col++; a += bs;
1012         }
1013       }
1014     }
1015     /* copy over the B part */
1016     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1017     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1018     for ( i=0; i<mbs; i++ ) {
1019       rvals[0] = bs*(baij->rstart + i);
1020       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1021       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1022         col = baij->garray[aj[j]]*bs;
1023         for (k=0; k<bs; k++ ) {
1024           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1025           col++; a += bs;
1026         }
1027       }
1028     }
1029     ierr = PetscFree(rvals);CHKERRQ(ierr);
1030     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1031     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1032     /*
1033        Everyone has to call to draw the matrix since the graphics waits are
1034        synchronized across all processors that share the Draw object
1035     */
1036     if (!rank) {
1037       Viewer sviewer;
1038       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1039       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1040       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1041     }
1042     ierr = MatDestroy(A);CHKERRQ(ierr);
1043   }
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 
1048 
1049 #undef __FUNC__
1050 #define __FUNC__ "MatView_MPIBAIJ"
1051 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1052 {
1053   int        ierr;
1054   PetscTruth isascii,isdraw,issocket,isbinary;
1055 
1056   PetscFunctionBegin;
1057   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1059   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1060   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1061   if (isascii || isdraw || issocket || isbinary) {
1062     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1063   } else {
1064     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNC__
1070 #define __FUNC__ "MatDestroy_MPIBAIJ"
1071 int MatDestroy_MPIBAIJ(Mat mat)
1072 {
1073   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1074   int         ierr;
1075 
1076   PetscFunctionBegin;
1077 
1078   if (mat->mapping) {
1079     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1080   }
1081   if (mat->bmapping) {
1082     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1083   }
1084   if (mat->rmap) {
1085     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1086   }
1087   if (mat->cmap) {
1088     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1089   }
1090 #if defined(PETSC_USE_LOG)
1091   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1092 #endif
1093 
1094   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1095   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1096 
1097   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1098   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1099   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1100 #if defined (PETSC_USE_CTABLE)
1101   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1102 #else
1103   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1104 #endif
1105   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1106   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1107   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1108   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1109   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1110   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1111   ierr = PetscFree(baij);CHKERRQ(ierr);
1112   PLogObjectDestroy(mat);
1113   PetscHeaderDestroy(mat);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNC__
1118 #define __FUNC__ "MatMult_MPIBAIJ"
1119 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1120 {
1121   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1122   int         ierr, nt;
1123 
1124   PetscFunctionBegin;
1125   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1126   if (nt != a->n) {
1127     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1128   }
1129   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1130   if (nt != a->m) {
1131     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1132   }
1133   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1134   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1135   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1136   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1137   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNC__
1142 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1143 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1144 {
1145   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1146   int        ierr;
1147 
1148   PetscFunctionBegin;
1149   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1150   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1151   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1152   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNC__
1157 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1158 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1159 {
1160   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1161   int         ierr;
1162 
1163   PetscFunctionBegin;
1164   /* do nondiagonal part */
1165   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1166   /* send it on its way */
1167   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1168   /* do local part */
1169   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1170   /* receive remote parts: note this assumes the values are not actually */
1171   /* inserted in yy until the next line, which is true for my implementation*/
1172   /* but is not perhaps always true. */
1173   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNC__
1178 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1179 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1180 {
1181   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1182   int         ierr;
1183 
1184   PetscFunctionBegin;
1185   /* do nondiagonal part */
1186   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1187   /* send it on its way */
1188   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1189   /* do local part */
1190   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1191   /* receive remote parts: note this assumes the values are not actually */
1192   /* inserted in yy until the next line, which is true for my implementation*/
1193   /* but is not perhaps always true. */
1194   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 /*
1199   This only works correctly for square matrices where the subblock A->A is the
1200    diagonal block
1201 */
1202 #undef __FUNC__
1203 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1204 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1205 {
1206   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1207   int         ierr;
1208 
1209   PetscFunctionBegin;
1210   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1211   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 #undef __FUNC__
1216 #define __FUNC__ "MatScale_MPIBAIJ"
1217 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1218 {
1219   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1220   int         ierr;
1221 
1222   PetscFunctionBegin;
1223   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1224   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNC__
1229 #define __FUNC__ "MatGetSize_MPIBAIJ"
1230 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1231 {
1232   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1233 
1234   PetscFunctionBegin;
1235   if (m) *m = mat->M;
1236   if (n) *n = mat->N;
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNC__
1241 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1242 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1243 {
1244   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1245 
1246   PetscFunctionBegin;
1247   *m = mat->m; *n = mat->n;
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNC__
1252 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1253 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1254 {
1255   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1256 
1257   PetscFunctionBegin;
1258   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNC__
1263 #define __FUNC__ "MatGetRow_MPIBAIJ"
1264 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1265 {
1266   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1267   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1268   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1269   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1270   int        *cmap, *idx_p,cstart = mat->cstart;
1271 
1272   PetscFunctionBegin;
1273   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1274   mat->getrowactive = PETSC_TRUE;
1275 
1276   if (!mat->rowvalues && (idx || v)) {
1277     /*
1278         allocate enough space to hold information from the longest row.
1279     */
1280     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1281     int     max = 1,mbs = mat->mbs,tmp;
1282     for ( i=0; i<mbs; i++ ) {
1283       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1284       if (max < tmp) { max = tmp; }
1285     }
1286     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1287     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1288   }
1289 
1290   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1291   lrow = row - brstart;
1292 
1293   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1294   if (!v)   {pvA = 0; pvB = 0;}
1295   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1296   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1297   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1298   nztot = nzA + nzB;
1299 
1300   cmap  = mat->garray;
1301   if (v  || idx) {
1302     if (nztot) {
1303       /* Sort by increasing column numbers, assuming A and B already sorted */
1304       int imark = -1;
1305       if (v) {
1306         *v = v_p = mat->rowvalues;
1307         for ( i=0; i<nzB; i++ ) {
1308           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1309           else break;
1310         }
1311         imark = i;
1312         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1313         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1314       }
1315       if (idx) {
1316         *idx = idx_p = mat->rowindices;
1317         if (imark > -1) {
1318           for ( i=0; i<imark; i++ ) {
1319             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1320           }
1321         } else {
1322           for ( i=0; i<nzB; i++ ) {
1323             if (cmap[cworkB[i]/bs] < cstart)
1324               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1325             else break;
1326           }
1327           imark = i;
1328         }
1329         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1330         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1331       }
1332     } else {
1333       if (idx) *idx = 0;
1334       if (v)   *v   = 0;
1335     }
1336   }
1337   *nz = nztot;
1338   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1339   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNC__
1344 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1345 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1346 {
1347   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1348 
1349   PetscFunctionBegin;
1350   if (baij->getrowactive == PETSC_FALSE) {
1351     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1352   }
1353   baij->getrowactive = PETSC_FALSE;
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNC__
1358 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1359 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1360 {
1361   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1362 
1363   PetscFunctionBegin;
1364   *bs = baij->bs;
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNC__
1369 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1370 int MatZeroEntries_MPIBAIJ(Mat A)
1371 {
1372   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1373   int         ierr;
1374 
1375   PetscFunctionBegin;
1376   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1377   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNC__
1382 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1383 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1384 {
1385   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1386   Mat         A = a->A, B = a->B;
1387   int         ierr;
1388   double      isend[5], irecv[5];
1389 
1390   PetscFunctionBegin;
1391   info->block_size     = (double)a->bs;
1392   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1393   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1394   isend[3] = info->memory;  isend[4] = info->mallocs;
1395   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1396   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1397   isend[3] += info->memory;  isend[4] += info->mallocs;
1398   if (flag == MAT_LOCAL) {
1399     info->nz_used      = isend[0];
1400     info->nz_allocated = isend[1];
1401     info->nz_unneeded  = isend[2];
1402     info->memory       = isend[3];
1403     info->mallocs      = isend[4];
1404   } else if (flag == MAT_GLOBAL_MAX) {
1405     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1406     info->nz_used      = irecv[0];
1407     info->nz_allocated = irecv[1];
1408     info->nz_unneeded  = irecv[2];
1409     info->memory       = irecv[3];
1410     info->mallocs      = irecv[4];
1411   } else if (flag == MAT_GLOBAL_SUM) {
1412     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1413     info->nz_used      = irecv[0];
1414     info->nz_allocated = irecv[1];
1415     info->nz_unneeded  = irecv[2];
1416     info->memory       = irecv[3];
1417     info->mallocs      = irecv[4];
1418   } else {
1419     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1420   }
1421   info->rows_global       = (double)a->M;
1422   info->columns_global    = (double)a->N;
1423   info->rows_local        = (double)a->m;
1424   info->columns_local     = (double)a->N;
1425   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1426   info->fill_ratio_needed = 0;
1427   info->factor_mallocs    = 0;
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNC__
1432 #define __FUNC__ "MatSetOption_MPIBAIJ"
1433 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1434 {
1435   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1436   int         ierr;
1437 
1438   PetscFunctionBegin;
1439   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1440       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1441       op == MAT_COLUMNS_UNSORTED ||
1442       op == MAT_COLUMNS_SORTED ||
1443       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1444       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1445         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1446         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1447   } else if (op == MAT_ROW_ORIENTED) {
1448         a->roworiented = 1;
1449         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1450         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1451   } else if (op == MAT_ROWS_SORTED ||
1452              op == MAT_ROWS_UNSORTED ||
1453              op == MAT_SYMMETRIC ||
1454              op == MAT_STRUCTURALLY_SYMMETRIC ||
1455              op == MAT_YES_NEW_DIAGONALS ||
1456              op == MAT_USE_HASH_TABLE) {
1457     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1458   } else if (op == MAT_COLUMN_ORIENTED) {
1459     a->roworiented = 0;
1460     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1461     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1462   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1463     a->donotstash = 1;
1464   } else if (op == MAT_NO_NEW_DIAGONALS) {
1465     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1466   } else if (op == MAT_USE_HASH_TABLE) {
1467     a->ht_flag = 1;
1468   } else {
1469     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1470   }
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNC__
1475 #define __FUNC__ "MatTranspose_MPIBAIJ("
1476 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1477 {
1478   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1479   Mat_SeqBAIJ *Aloc;
1480   Mat        B;
1481   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1482   int        bs=baij->bs,mbs=baij->mbs;
1483   Scalar     *a;
1484 
1485   PetscFunctionBegin;
1486   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1487   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1488 CHKERRQ(ierr);
1489 
1490   /* copy over the A part */
1491   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1492   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1493   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1494 
1495   for ( i=0; i<mbs; i++ ) {
1496     rvals[0] = bs*(baij->rstart + i);
1497     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1498     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1499       col = (baij->cstart+aj[j])*bs;
1500       for (k=0; k<bs; k++ ) {
1501         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1502         col++; a += bs;
1503       }
1504     }
1505   }
1506   /* copy over the B part */
1507   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1508   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1509   for ( i=0; i<mbs; i++ ) {
1510     rvals[0] = bs*(baij->rstart + i);
1511     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1512     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1513       col = baij->garray[aj[j]]*bs;
1514       for (k=0; k<bs; k++ ) {
1515         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1516         col++; a += bs;
1517       }
1518     }
1519   }
1520   ierr = PetscFree(rvals);CHKERRQ(ierr);
1521   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1522   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1523 
1524   if (matout != PETSC_NULL) {
1525     *matout = B;
1526   } else {
1527     PetscOps *Abops;
1528     MatOps   Aops;
1529 
1530     /* This isn't really an in-place transpose .... but free data structures from baij */
1531     ierr = PetscFree(baij->rowners); CHKERRQ(ierr);
1532     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1533     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1534 #if defined (PETSC_USE_CTABLE)
1535     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1536 #else
1537     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1538 #endif
1539     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1540     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1541     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1542     ierr = PetscFree(baij);CHKERRQ(ierr);
1543 
1544     /*
1545        This is horrible, horrible code. We need to keep the
1546       A pointers for the bops and ops but copy everything
1547       else from C.
1548     */
1549     Abops   = A->bops;
1550     Aops    = A->ops;
1551     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1552     A->bops = Abops;
1553     A->ops  = Aops;
1554 
1555     PetscHeaderDestroy(B);
1556   }
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #undef __FUNC__
1561 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1562 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1563 {
1564   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1565   Mat         a = baij->A, b = baij->B;
1566   int         ierr,s1,s2,s3;
1567 
1568   PetscFunctionBegin;
1569   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1570   if (rr) {
1571     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1572     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1573     /* Overlap communication with computation. */
1574     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1575   }
1576   if (ll) {
1577     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1578     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1579     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1580   }
1581   /* scale  the diagonal block */
1582   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1583 
1584   if (rr) {
1585     /* Do a scatter end and then right scale the off-diagonal block */
1586     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1587     ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr);
1588   }
1589 
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNC__
1594 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1595 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1596 {
1597   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1598   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1599   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1600   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1601   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1602   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1603   MPI_Comm       comm = A->comm;
1604   MPI_Request    *send_waits,*recv_waits;
1605   MPI_Status     recv_status,*send_status;
1606   IS             istmp;
1607 
1608   PetscFunctionBegin;
1609   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1610   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1611 
1612   /*  first count number of contributors to each processor */
1613   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1614   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1615   procs  = nprocs + size;
1616   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1617   for ( i=0; i<N; i++ ) {
1618     idx   = rows[i];
1619     found = 0;
1620     for ( j=0; j<size; j++ ) {
1621       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1622         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1623       }
1624     }
1625     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1626   }
1627   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1628 
1629   /* inform other processors of number of messages and max length*/
1630   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
1631   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1632   nmax   = work[rank];
1633   nrecvs = work[size+rank];
1634   ierr = PetscFree(work);CHKERRQ(ierr);
1635 
1636   /* post receives:   */
1637   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1638   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1639   for ( i=0; i<nrecvs; i++ ) {
1640     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1641   }
1642 
1643   /* do sends:
1644      1) starts[i] gives the starting index in svalues for stuff going to
1645      the ith processor
1646   */
1647   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1648   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1649   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
1650   starts[0]  = 0;
1651   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1652   for ( i=0; i<N; i++ ) {
1653     svalues[starts[owner[i]]++] = rows[i];
1654   }
1655   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1656 
1657   starts[0] = 0;
1658   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1659   count = 0;
1660   for ( i=0; i<size; i++ ) {
1661     if (procs[i]) {
1662       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1663     }
1664   }
1665   ierr = PetscFree(starts);CHKERRQ(ierr);
1666 
1667   base = owners[rank]*bs;
1668 
1669   /*  wait on receives */
1670   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
1671   source = lens + nrecvs;
1672   count  = nrecvs; slen = 0;
1673   while (count) {
1674     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1675     /* unpack receives into our local space */
1676     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1677     source[imdex]  = recv_status.MPI_SOURCE;
1678     lens[imdex]    = n;
1679     slen          += n;
1680     count--;
1681   }
1682   ierr = PetscFree(recv_waits); CHKERRQ(ierr);
1683 
1684   /* move the data into the send scatter */
1685   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
1686   count = 0;
1687   for ( i=0; i<nrecvs; i++ ) {
1688     values = rvalues + i*nmax;
1689     for ( j=0; j<lens[i]; j++ ) {
1690       lrows[count++] = values[j] - base;
1691     }
1692   }
1693   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1694   ierr = PetscFree(lens);CHKERRQ(ierr);
1695   ierr = PetscFree(owner);CHKERRQ(ierr);
1696   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1697 
1698   /* actually zap the local rows */
1699   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1700   PLogObjectParent(A,istmp);
1701 
1702   /*
1703         Zero the required rows. If the "diagonal block" of the matrix
1704      is square and the user wishes to set the diagonal we use seperate
1705      code so that MatSetValues() is not called for each diagonal allocating
1706      new memory, thus calling lots of mallocs and slowing things down.
1707 
1708        Contributed by: Mathew Knepley
1709   */
1710   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1711   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
1712   if (diag && (l->A->M == l->A->N)) {
1713     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
1714   } else if (diag) {
1715     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1716     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1717       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1718 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1719     }
1720     for ( i = 0; i < slen; i++ ) {
1721       row  = lrows[i] + rstart_bs;
1722       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1723     }
1724     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1725     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1726   } else {
1727     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1728   }
1729 
1730   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1731   ierr = PetscFree(lrows);CHKERRQ(ierr);
1732 
1733   /* wait on sends */
1734   if (nsends) {
1735     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1736     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1737     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1738   }
1739   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1740   ierr = PetscFree(svalues);CHKERRQ(ierr);
1741 
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 #undef __FUNC__
1746 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1747 int MatPrintHelp_MPIBAIJ(Mat A)
1748 {
1749   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1750   MPI_Comm    comm = A->comm;
1751   static int  called = 0;
1752   int         ierr;
1753 
1754   PetscFunctionBegin;
1755   if (!a->rank) {
1756     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1757   }
1758   if (called) {PetscFunctionReturn(0);} else called = 1;
1759   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1760   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNC__
1765 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1766 int MatSetUnfactored_MPIBAIJ(Mat A)
1767 {
1768   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1769   int         ierr;
1770 
1771   PetscFunctionBegin;
1772   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1777 
1778 #undef __FUNC__
1779 #define __FUNC__ "MatEqual_MPIBAIJ"
1780 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
1781 {
1782   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
1783   Mat         a, b, c, d;
1784   PetscTruth  flg;
1785   int         ierr;
1786 
1787   PetscFunctionBegin;
1788   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1789   a = matA->A; b = matA->B;
1790   c = matB->A; d = matB->B;
1791 
1792   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1793   if (flg == PETSC_TRUE) {
1794     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1795   }
1796   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 /* -------------------------------------------------------------------*/
1801 static struct _MatOps MatOps_Values = {
1802   MatSetValues_MPIBAIJ,
1803   MatGetRow_MPIBAIJ,
1804   MatRestoreRow_MPIBAIJ,
1805   MatMult_MPIBAIJ,
1806   MatMultAdd_MPIBAIJ,
1807   MatMultTrans_MPIBAIJ,
1808   MatMultTransAdd_MPIBAIJ,
1809   0,
1810   0,
1811   0,
1812   0,
1813   0,
1814   0,
1815   0,
1816   MatTranspose_MPIBAIJ,
1817   MatGetInfo_MPIBAIJ,
1818   MatEqual_MPIBAIJ,
1819   MatGetDiagonal_MPIBAIJ,
1820   MatDiagonalScale_MPIBAIJ,
1821   MatNorm_MPIBAIJ,
1822   MatAssemblyBegin_MPIBAIJ,
1823   MatAssemblyEnd_MPIBAIJ,
1824   0,
1825   MatSetOption_MPIBAIJ,
1826   MatZeroEntries_MPIBAIJ,
1827   MatZeroRows_MPIBAIJ,
1828   0,
1829   0,
1830   0,
1831   0,
1832   MatGetSize_MPIBAIJ,
1833   MatGetLocalSize_MPIBAIJ,
1834   MatGetOwnershipRange_MPIBAIJ,
1835   0,
1836   0,
1837   0,
1838   0,
1839   MatDuplicate_MPIBAIJ,
1840   0,
1841   0,
1842   0,
1843   0,
1844   0,
1845   MatGetSubMatrices_MPIBAIJ,
1846   MatIncreaseOverlap_MPIBAIJ,
1847   MatGetValues_MPIBAIJ,
1848   0,
1849   MatPrintHelp_MPIBAIJ,
1850   MatScale_MPIBAIJ,
1851   0,
1852   0,
1853   0,
1854   MatGetBlockSize_MPIBAIJ,
1855   0,
1856   0,
1857   0,
1858   0,
1859   0,
1860   0,
1861   MatSetUnfactored_MPIBAIJ,
1862   0,
1863   MatSetValuesBlocked_MPIBAIJ,
1864   0,
1865   0,
1866   0,
1867   MatGetMaps_Petsc};
1868 
1869 
1870 EXTERN_C_BEGIN
1871 #undef __FUNC__
1872 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1873 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1874 {
1875   PetscFunctionBegin;
1876   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1877   *iscopy = PETSC_FALSE;
1878   PetscFunctionReturn(0);
1879 }
1880 EXTERN_C_END
1881 
1882 #undef __FUNC__
1883 #define __FUNC__ "MatCreateMPIBAIJ"
1884 /*@C
1885    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1886    (block compressed row).  For good matrix assembly performance
1887    the user should preallocate the matrix storage by setting the parameters
1888    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1889    performance can be increased by more than a factor of 50.
1890 
1891    Collective on MPI_Comm
1892 
1893    Input Parameters:
1894 +  comm - MPI communicator
1895 .  bs   - size of blockk
1896 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1897            This value should be the same as the local size used in creating the
1898            y vector for the matrix-vector product y = Ax.
1899 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1900            This value should be the same as the local size used in creating the
1901            x vector for the matrix-vector product y = Ax.
1902 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1903 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1904 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1905            submatrix  (same for all local rows)
1906 .  d_nnz - array containing the number of block nonzeros in the various block rows
1907            of the in diagonal portion of the local (possibly different for each block
1908            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1909 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1910            submatrix (same for all local rows).
1911 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1912            off-diagonal portion of the local submatrix (possibly different for
1913            each block row) or PETSC_NULL.
1914 
1915    Output Parameter:
1916 .  A - the matrix
1917 
1918    Options Database Keys:
1919 .   -mat_no_unroll - uses code that does not unroll the loops in the
1920                      block calculations (much slower)
1921 .   -mat_block_size - size of the blocks to use
1922 .   -mat_mpi - use the parallel matrix data structures even on one processor
1923                (defaults to using SeqBAIJ format on one processor)
1924 
1925    Notes:
1926    The user MUST specify either the local or global matrix dimensions
1927    (possibly both).
1928 
1929    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1930    than it must be used on all processors that share the object for that argument.
1931 
1932    Storage Information:
1933    For a square global matrix we define each processor's diagonal portion
1934    to be its local rows and the corresponding columns (a square submatrix);
1935    each processor's off-diagonal portion encompasses the remainder of the
1936    local matrix (a rectangular submatrix).
1937 
1938    The user can specify preallocated storage for the diagonal part of
1939    the local submatrix with either d_nz or d_nnz (not both).  Set
1940    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1941    memory allocation.  Likewise, specify preallocated storage for the
1942    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1943 
1944    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1945    the figure below we depict these three local rows and all columns (0-11).
1946 
1947 .vb
1948            0 1 2 3 4 5 6 7 8 9 10 11
1949           -------------------
1950    row 3  |  o o o d d d o o o o o o
1951    row 4  |  o o o d d d o o o o o o
1952    row 5  |  o o o d d d o o o o o o
1953           -------------------
1954 .ve
1955 
1956    Thus, any entries in the d locations are stored in the d (diagonal)
1957    submatrix, and any entries in the o locations are stored in the
1958    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1959    stored simply in the MATSEQBAIJ format for compressed row storage.
1960 
1961    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1962    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1963    In general, for PDE problems in which most nonzeros are near the diagonal,
1964    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1965    or you will get TERRIBLE performance; see the users' manual chapter on
1966    matrices.
1967 
1968    Level: intermediate
1969 
1970 .keywords: matrix, block, aij, compressed row, sparse, parallel
1971 
1972 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1973 @*/
1974 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1975                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1976 {
1977   Mat          B;
1978   Mat_MPIBAIJ  *b;
1979   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1980   int          flag1 = 0,flag2 = 0;
1981 
1982   PetscFunctionBegin;
1983   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1984 
1985   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1986   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
1987   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
1988   if (d_nnz) {
1989     for (i=0; i<m/bs; i++) {
1990       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1991     }
1992   }
1993   if (o_nnz) {
1994     for (i=0; i<m/bs; i++) {
1995       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1996     }
1997   }
1998 
1999   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2000   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2001   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2002   if (!flag1 && !flag2 && size == 1) {
2003     if (M == PETSC_DECIDE) M = m;
2004     if (N == PETSC_DECIDE) N = n;
2005     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2006     PetscFunctionReturn(0);
2007   }
2008 
2009   *A = 0;
2010   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2011   PLogObjectCreate(B);
2012   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2013   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2014   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2015 
2016   B->ops->destroy    = MatDestroy_MPIBAIJ;
2017   B->ops->view       = MatView_MPIBAIJ;
2018   B->mapping    = 0;
2019   B->factor     = 0;
2020   B->assembled  = PETSC_FALSE;
2021 
2022   B->insertmode = NOT_SET_VALUES;
2023   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2024   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2025 
2026   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2027     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2028   }
2029   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2030     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2031   }
2032   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2033     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2034   }
2035   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2036   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2037 
2038   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2039     work[0] = m; work[1] = n;
2040     mbs = m/bs; nbs = n/bs;
2041     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2042     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2043     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2044   }
2045   if (m == PETSC_DECIDE) {
2046     Mbs = M/bs;
2047     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2048     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2049     m   = mbs*bs;
2050   }
2051   if (n == PETSC_DECIDE) {
2052     Nbs = N/bs;
2053     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2054     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2055     n   = nbs*bs;
2056   }
2057   if (mbs*bs != m || nbs*bs != n) {
2058     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2059   }
2060 
2061   b->m = m; B->m = m;
2062   b->n = n; B->n = n;
2063   b->N = N; B->N = N;
2064   b->M = M; B->M = M;
2065   b->bs  = bs;
2066   b->bs2 = bs*bs;
2067   b->mbs = mbs;
2068   b->nbs = nbs;
2069   b->Mbs = Mbs;
2070   b->Nbs = Nbs;
2071 
2072   /* the information in the maps duplicates the information computed below, eventually
2073      we should remove the duplicate information that is not contained in the maps */
2074   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2075   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2076 
2077   /* build local table of row and column ownerships */
2078   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2079   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2080   b->cowners    = b->rowners + b->size + 2;
2081   b->rowners_bs = b->cowners + b->size + 2;
2082   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2083   b->rowners[0]    = 0;
2084   for ( i=2; i<=b->size; i++ ) {
2085     b->rowners[i] += b->rowners[i-1];
2086   }
2087   for ( i=0; i<=b->size; i++ ) {
2088     b->rowners_bs[i] = b->rowners[i]*bs;
2089   }
2090   b->rstart    = b->rowners[b->rank];
2091   b->rend      = b->rowners[b->rank+1];
2092   b->rstart_bs = b->rstart * bs;
2093   b->rend_bs   = b->rend * bs;
2094 
2095   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2096   b->cowners[0] = 0;
2097   for ( i=2; i<=b->size; i++ ) {
2098     b->cowners[i] += b->cowners[i-1];
2099   }
2100   b->cstart    = b->cowners[b->rank];
2101   b->cend      = b->cowners[b->rank+1];
2102   b->cstart_bs = b->cstart * bs;
2103   b->cend_bs   = b->cend * bs;
2104 
2105 
2106   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2107   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2108   PLogObjectParent(B,b->A);
2109   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2110   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2111   PLogObjectParent(B,b->B);
2112 
2113   /* build cache for off array entries formed */
2114   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2115   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2116   b->donotstash  = 0;
2117   b->colmap      = 0;
2118   b->garray      = 0;
2119   b->roworiented = 1;
2120 
2121   /* stuff used in block assembly */
2122   b->barray       = 0;
2123 
2124   /* stuff used for matrix vector multiply */
2125   b->lvec         = 0;
2126   b->Mvctx        = 0;
2127 
2128   /* stuff for MatGetRow() */
2129   b->rowindices   = 0;
2130   b->rowvalues    = 0;
2131   b->getrowactive = PETSC_FALSE;
2132 
2133   /* hash table stuff */
2134   b->ht           = 0;
2135   b->hd           = 0;
2136   b->ht_size      = 0;
2137   b->ht_flag      = 0;
2138   b->ht_fact      = 0;
2139   b->ht_total_ct  = 0;
2140   b->ht_insert_ct = 0;
2141 
2142   *A = B;
2143   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2144   if (flg) {
2145     double fact = 1.39;
2146     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2147     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr);
2148     if (fact <= 1.0) fact = 1.39;
2149     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2150     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2151   }
2152   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
2153                                      "MatStoreValues_MPIBAIJ",
2154                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2155   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
2156                                      "MatRetrieveValues_MPIBAIJ",
2157                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2158   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2159                                      "MatGetDiagonalBlock_MPIBAIJ",
2160                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 #undef __FUNC__
2165 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2166 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2167 {
2168   Mat         mat;
2169   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2170   int         ierr, len=0, flg;
2171 
2172   PetscFunctionBegin;
2173   *newmat       = 0;
2174   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2175   PLogObjectCreate(mat);
2176   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2177   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2178   mat->ops->destroy = MatDestroy_MPIBAIJ;
2179   mat->ops->view    = MatView_MPIBAIJ;
2180   mat->factor       = matin->factor;
2181   mat->assembled    = PETSC_TRUE;
2182   mat->insertmode   = NOT_SET_VALUES;
2183 
2184   a->m = mat->m   = oldmat->m;
2185   a->n = mat->n   = oldmat->n;
2186   a->M = mat->M   = oldmat->M;
2187   a->N = mat->N   = oldmat->N;
2188 
2189   a->bs  = oldmat->bs;
2190   a->bs2 = oldmat->bs2;
2191   a->mbs = oldmat->mbs;
2192   a->nbs = oldmat->nbs;
2193   a->Mbs = oldmat->Mbs;
2194   a->Nbs = oldmat->Nbs;
2195 
2196   a->rstart       = oldmat->rstart;
2197   a->rend         = oldmat->rend;
2198   a->cstart       = oldmat->cstart;
2199   a->cend         = oldmat->cend;
2200   a->size         = oldmat->size;
2201   a->rank         = oldmat->rank;
2202   a->donotstash   = oldmat->donotstash;
2203   a->roworiented  = oldmat->roworiented;
2204   a->rowindices   = 0;
2205   a->rowvalues    = 0;
2206   a->getrowactive = PETSC_FALSE;
2207   a->barray       = 0;
2208   a->rstart_bs    = oldmat->rstart_bs;
2209   a->rend_bs      = oldmat->rend_bs;
2210   a->cstart_bs    = oldmat->cstart_bs;
2211   a->cend_bs      = oldmat->cend_bs;
2212 
2213   /* hash table stuff */
2214   a->ht           = 0;
2215   a->hd           = 0;
2216   a->ht_size      = 0;
2217   a->ht_flag      = oldmat->ht_flag;
2218   a->ht_fact      = oldmat->ht_fact;
2219   a->ht_total_ct  = 0;
2220   a->ht_insert_ct = 0;
2221 
2222 
2223   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2224   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2225   a->cowners    = a->rowners + a->size + 2;
2226   a->rowners_bs = a->cowners + a->size + 2;
2227   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2228   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2229   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2230   if (oldmat->colmap) {
2231 #if defined (PETSC_USE_CTABLE)
2232   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2233 #else
2234     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2235     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2236     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2237 #endif
2238   } else a->colmap = 0;
2239   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2240     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2241     PLogObjectMemory(mat,len*sizeof(int));
2242     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2243   } else a->garray = 0;
2244 
2245   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2246   PLogObjectParent(mat,a->lvec);
2247   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2248 
2249   PLogObjectParent(mat,a->Mvctx);
2250   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2251   PLogObjectParent(mat,a->A);
2252   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2253   PLogObjectParent(mat,a->B);
2254   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2255   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2256   if (flg) {
2257     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2258   }
2259   *newmat = mat;
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #include "sys.h"
2264 
2265 #undef __FUNC__
2266 #define __FUNC__ "MatLoad_MPIBAIJ"
2267 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2268 {
2269   Mat          A;
2270   int          i, nz, ierr, j,rstart, rend, fd;
2271   Scalar       *vals,*buf;
2272   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2273   MPI_Status   status;
2274   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2275   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2276   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2277   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2278   int          dcount,kmax,k,nzcount,tmp;
2279 
2280   PetscFunctionBegin;
2281   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2282 
2283   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2284   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2285   if (!rank) {
2286     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2287     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2288     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2289     if (header[3] < 0) {
2290       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2291     }
2292   }
2293 
2294   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2295   M = header[1]; N = header[2];
2296 
2297   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2298 
2299   /*
2300      This code adds extra rows to make sure the number of rows is
2301      divisible by the blocksize
2302   */
2303   Mbs        = M/bs;
2304   extra_rows = bs - M + bs*(Mbs);
2305   if (extra_rows == bs) extra_rows = 0;
2306   else                  Mbs++;
2307   if (extra_rows &&!rank) {
2308     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2309   }
2310 
2311   /* determine ownership of all rows */
2312   mbs = Mbs/size + ((Mbs % size) > rank);
2313   m   = mbs * bs;
2314   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2315   browners = rowners + size + 1;
2316   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2317   rowners[0] = 0;
2318   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2319   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2320   rstart = rowners[rank];
2321   rend   = rowners[rank+1];
2322 
2323   /* distribute row lengths to all processors */
2324   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
2325   if (!rank) {
2326     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2327     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2328     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2329     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2330     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2331     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2332     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2333   } else {
2334     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2335   }
2336 
2337   if (!rank) {
2338     /* calculate the number of nonzeros on each processor */
2339     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2340     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2341     for ( i=0; i<size; i++ ) {
2342       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2343         procsnz[i] += rowlengths[j];
2344       }
2345     }
2346     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2347 
2348     /* determine max buffer needed and allocate it */
2349     maxnz = 0;
2350     for ( i=0; i<size; i++ ) {
2351       maxnz = PetscMax(maxnz,procsnz[i]);
2352     }
2353     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2354 
2355     /* read in my part of the matrix column indices  */
2356     nz = procsnz[0];
2357     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2358     mycols = ibuf;
2359     if (size == 1)  nz -= extra_rows;
2360     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2361     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2362 
2363     /* read in every ones (except the last) and ship off */
2364     for ( i=1; i<size-1; i++ ) {
2365       nz   = procsnz[i];
2366       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2367       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2368     }
2369     /* read in the stuff for the last proc */
2370     if ( size != 1 ) {
2371       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2372       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2373       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2374       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2375     }
2376     ierr = PetscFree(cols);CHKERRQ(ierr);
2377   } else {
2378     /* determine buffer space needed for message */
2379     nz = 0;
2380     for ( i=0; i<m; i++ ) {
2381       nz += locrowlens[i];
2382     }
2383     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2384     mycols = ibuf;
2385     /* receive message of column indices*/
2386     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2387     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2388     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2389   }
2390 
2391   /* loop over local rows, determining number of off diagonal entries */
2392   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2393   odlens = dlens + (rend-rstart);
2394   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2395   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2396   masked1 = mask    + Mbs;
2397   masked2 = masked1 + Mbs;
2398   rowcount = 0; nzcount = 0;
2399   for ( i=0; i<mbs; i++ ) {
2400     dcount  = 0;
2401     odcount = 0;
2402     for ( j=0; j<bs; j++ ) {
2403       kmax = locrowlens[rowcount];
2404       for ( k=0; k<kmax; k++ ) {
2405         tmp = mycols[nzcount++]/bs;
2406         if (!mask[tmp]) {
2407           mask[tmp] = 1;
2408           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2409           else masked1[dcount++] = tmp;
2410         }
2411       }
2412       rowcount++;
2413     }
2414 
2415     dlens[i]  = dcount;
2416     odlens[i] = odcount;
2417 
2418     /* zero out the mask elements we set */
2419     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2420     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2421   }
2422 
2423   /* create our matrix */
2424   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2425   A = *newmat;
2426   MatSetOption(A,MAT_COLUMNS_SORTED);
2427 
2428   if (!rank) {
2429     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
2430     /* read in my part of the matrix numerical values  */
2431     nz = procsnz[0];
2432     vals = buf;
2433     mycols = ibuf;
2434     if (size == 1)  nz -= extra_rows;
2435     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2436     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2437 
2438     /* insert into matrix */
2439     jj      = rstart*bs;
2440     for ( i=0; i<m; i++ ) {
2441       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2442       mycols += locrowlens[i];
2443       vals   += locrowlens[i];
2444       jj++;
2445     }
2446     /* read in other processors (except the last one) and ship out */
2447     for ( i=1; i<size-1; i++ ) {
2448       nz   = procsnz[i];
2449       vals = buf;
2450       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2451       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2452     }
2453     /* the last proc */
2454     if ( size != 1 ){
2455       nz   = procsnz[i] - extra_rows;
2456       vals = buf;
2457       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2458       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2459       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2460     }
2461     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2462   } else {
2463     /* receive numeric values */
2464     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
2465 
2466     /* receive message of values*/
2467     vals   = buf;
2468     mycols = ibuf;
2469     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2470     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2471     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2472 
2473     /* insert into matrix */
2474     jj      = rstart*bs;
2475     for ( i=0; i<m; i++ ) {
2476       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2477       mycols += locrowlens[i];
2478       vals   += locrowlens[i];
2479       jj++;
2480     }
2481   }
2482   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2483   ierr = PetscFree(buf);CHKERRQ(ierr);
2484   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2485   ierr = PetscFree(rowners);CHKERRQ(ierr);
2486   ierr = PetscFree(dlens);CHKERRQ(ierr);
2487   ierr = PetscFree(mask);CHKERRQ(ierr);
2488   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2489   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 #undef __FUNC__
2494 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2495 /*@
2496    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2497 
2498    Input Parameters:
2499 .  mat  - the matrix
2500 .  fact - factor
2501 
2502    Collective on Mat
2503 
2504    Level: advanced
2505 
2506   Notes:
2507    This can also be set by the command line option: -mat_use_hash_table fact
2508 
2509 .keywords: matrix, hashtable, factor, HT
2510 
2511 .seealso: MatSetOption()
2512 @*/
2513 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2514 {
2515   Mat_MPIBAIJ *baij;
2516 
2517   PetscFunctionBegin;
2518   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2519   if (mat->type != MATMPIBAIJ) {
2520     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2521   }
2522   baij = (Mat_MPIBAIJ*) mat->data;
2523   baij->ht_fact = fact;
2524   PetscFunctionReturn(0);
2525 }
2526