xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 454a90a3eb7bca6958262e5eca1eb393ad97e108)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.178 1999/09/21 14:41:06 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 EXTERN_C_BEGIN
20 #undef __FUNC__
21 #define __FUNC__ "MatStoreValues_MPIBAIJ"
22 int MatStoreValues_MPIBAIJ(Mat mat)
23 {
24   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
25   int         ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
29   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 EXTERN_C_END
33 
34 EXTERN_C_BEGIN
35 #undef __FUNC__
36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
37 int MatRetrieveValues_MPIBAIJ(Mat mat)
38 {
39   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
40   int         ierr;
41 
42   PetscFunctionBegin;
43   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
44   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
45   PetscFunctionReturn(0);
46 }
47 EXTERN_C_END
48 
49 /*
50      Local utility routine that creates a mapping from the global column
51    number to the local number in the off-diagonal part of the local
52    storage of the matrix.  This is done in a non scable way since the
53    length of colmap equals the global matrix length.
54 */
55 #undef __FUNC__
56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
57 static int CreateColmap_MPIBAIJ_Private(Mat mat)
58 {
59   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
60   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
61   int         nbs = B->nbs,i,bs=B->bs,ierr;
62 
63   PetscFunctionBegin;
64 #if defined (PETSC_USE_CTABLE)
65   ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
66   for ( i=0; i<nbs; i++ ){
67     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
68   }
69 #else
70   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
71   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
72   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
73   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
74 #endif
75   PetscFunctionReturn(0);
76 }
77 
78 #define CHUNKSIZE  10
79 
80 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
81 { \
82  \
83     brow = row/bs;  \
84     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
85     rmax = aimax[brow]; nrow = ailen[brow]; \
86       bcol = col/bs; \
87       ridx = row % bs; cidx = col % bs; \
88       low = 0; high = nrow; \
89       while (high-low > 3) { \
90         t = (low+high)/2; \
91         if (rp[t] > bcol) high = t; \
92         else              low  = t; \
93       } \
94       for ( _i=low; _i<high; _i++ ) { \
95         if (rp[_i] > bcol) break; \
96         if (rp[_i] == bcol) { \
97           bap  = ap +  bs2*_i + bs*cidx + ridx; \
98           if (addv == ADD_VALUES) *bap += value;  \
99           else                    *bap  = value;  \
100           goto a_noinsert; \
101         } \
102       } \
103       if (a->nonew == 1) goto a_noinsert; \
104       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
105       if (nrow >= rmax) { \
106         /* there is no extra room in row, therefore enlarge */ \
107         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
108         Scalar *new_a; \
109  \
110         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
111  \
112         /* malloc new storage space */ \
113         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
114         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
115         new_j   = (int *) (new_a + bs2*new_nz); \
116         new_i   = new_j + new_nz; \
117  \
118         /* copy over old data into new slots */ \
119         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
120         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
121         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
122         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
123         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
124                           len*sizeof(int));CHKERRQ(ierr); \
125         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
126         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
127         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
128                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
129         /* free up old matrix storage */ \
130         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
131         if (!a->singlemalloc) { \
132           ierr = PetscFree(a->i);CHKERRQ(ierr); \
133           ierr = PetscFree(a->j);CHKERRQ(ierr);\
134         } \
135         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
136         a->singlemalloc = 1; \
137  \
138         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
139         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
140         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
141         a->maxnz += bs2*CHUNKSIZE; \
142         a->reallocs++; \
143         a->nz++; \
144       } \
145       N = nrow++ - 1;  \
146       /* shift up all the later entries in this row */ \
147       for ( ii=N; ii>=_i; ii-- ) { \
148         rp[ii+1] = rp[ii]; \
149         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
150       } \
151       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
152       rp[_i]                      = bcol;  \
153       ap[bs2*_i + bs*cidx + ridx] = value;  \
154       a_noinsert:; \
155     ailen[brow] = nrow; \
156 }
157 
158 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
159 { \
160  \
161     brow = row/bs;  \
162     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
163     rmax = bimax[brow]; nrow = bilen[brow]; \
164       bcol = col/bs; \
165       ridx = row % bs; cidx = col % bs; \
166       low = 0; high = nrow; \
167       while (high-low > 3) { \
168         t = (low+high)/2; \
169         if (rp[t] > bcol) high = t; \
170         else              low  = t; \
171       } \
172       for ( _i=low; _i<high; _i++ ) { \
173         if (rp[_i] > bcol) break; \
174         if (rp[_i] == bcol) { \
175           bap  = ap +  bs2*_i + bs*cidx + ridx; \
176           if (addv == ADD_VALUES) *bap += value;  \
177           else                    *bap  = value;  \
178           goto b_noinsert; \
179         } \
180       } \
181       if (b->nonew == 1) goto b_noinsert; \
182       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
183       if (nrow >= rmax) { \
184         /* there is no extra room in row, therefore enlarge */ \
185         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
186         Scalar *new_a; \
187  \
188         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
189  \
190         /* malloc new storage space */ \
191         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
192         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
193         new_j   = (int *) (new_a + bs2*new_nz); \
194         new_i   = new_j + new_nz; \
195  \
196         /* copy over old data into new slots */ \
197         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
198         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
199         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
200         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
201         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
202                            len*sizeof(int));CHKERRQ(ierr); \
203         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
204         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
205         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
206                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
207         /* free up old matrix storage */ \
208         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
209         if (!b->singlemalloc) { \
210           ierr = PetscFree(b->i);CHKERRQ(ierr); \
211           ierr = PetscFree(b->j);CHKERRQ(ierr); \
212         } \
213         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
214         b->singlemalloc = 1; \
215  \
216         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
217         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
218         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
219         b->maxnz += bs2*CHUNKSIZE; \
220         b->reallocs++; \
221         b->nz++; \
222       } \
223       N = nrow++ - 1;  \
224       /* shift up all the later entries in this row */ \
225       for ( ii=N; ii>=_i; ii-- ) { \
226         rp[ii+1] = rp[ii]; \
227         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
228       } \
229       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
230       rp[_i]                      = bcol;  \
231       ap[bs2*_i + bs*cidx + ridx] = value;  \
232       b_noinsert:; \
233     bilen[brow] = nrow; \
234 }
235 
236 #undef __FUNC__
237 #define __FUNC__ "MatSetValues_MPIBAIJ"
238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
239 {
240   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
241   Scalar      value;
242   int         ierr,i,j,row,col;
243   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
244   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
245   int         cend_orig=baij->cend_bs,bs=baij->bs;
246 
247   /* Some Variables required in the macro */
248   Mat         A = baij->A;
249   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
250   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
251   Scalar      *aa=a->a;
252 
253   Mat         B = baij->B;
254   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
255   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
256   Scalar      *ba=b->a;
257 
258   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
259   int         low,high,t,ridx,cidx,bs2=a->bs2;
260   Scalar      *ap,*bap;
261 
262   PetscFunctionBegin;
263   for ( i=0; i<m; i++ ) {
264     if (im[i] < 0) continue;
265 #if defined(PETSC_USE_BOPT_g)
266     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
267 #endif
268     if (im[i] >= rstart_orig && im[i] < rend_orig) {
269       row = im[i] - rstart_orig;
270       for ( j=0; j<n; j++ ) {
271         if (in[j] >= cstart_orig && in[j] < cend_orig){
272           col = in[j] - cstart_orig;
273           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
274           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
275           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
276         } else if (in[j] < 0) continue;
277 #if defined(PETSC_USE_BOPT_g)
278         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
279 #endif
280         else {
281           if (mat->was_assembled) {
282             if (!baij->colmap) {
283               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
284             }
285 #if defined (PETSC_USE_CTABLE)
286             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
287             col  = col - 1 + in[j]%bs;
288 #else
289             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
290 #endif
291             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
292               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
293               col =  in[j];
294               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
295               B = baij->B;
296               b = (Mat_SeqBAIJ *) (B)->data;
297               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
298               ba=b->a;
299             }
300           } else col = in[j];
301           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
302           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
303           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
304         }
305       }
306     } else {
307       if ( !baij->donotstash) {
308         if (roworiented) {
309           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
310         } else {
311           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
312         }
313       }
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
322 {
323   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
324   Scalar      *value,*barray=baij->barray;
325   int         ierr,i,j,ii,jj,row,col;
326   int         roworiented = baij->roworiented,rstart=baij->rstart ;
327   int         rend=baij->rend,cstart=baij->cstart,stepval;
328   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
329 
330   PetscFunctionBegin;
331   if(!barray) {
332     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
333   }
334 
335   if (roworiented) {
336     stepval = (n-1)*bs;
337   } else {
338     stepval = (m-1)*bs;
339   }
340   for ( i=0; i<m; i++ ) {
341     if (im[i] < 0) continue;
342 #if defined(PETSC_USE_BOPT_g)
343     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
344 #endif
345     if (im[i] >= rstart && im[i] < rend) {
346       row = im[i] - rstart;
347       for ( j=0; j<n; j++ ) {
348         /* If NumCol = 1 then a copy is not required */
349         if ((roworiented) && (n == 1)) {
350           barray = v + i*bs2;
351         } else if((!roworiented) && (m == 1)) {
352           barray = v + j*bs2;
353         } else { /* Here a copy is required */
354           if (roworiented) {
355             value = v + i*(stepval+bs)*bs + j*bs;
356           } else {
357             value = v + j*(stepval+bs)*bs + i*bs;
358           }
359           for ( ii=0; ii<bs; ii++,value+=stepval ) {
360             for (jj=0; jj<bs; jj++ ) {
361               *barray++  = *value++;
362             }
363           }
364           barray -=bs2;
365         }
366 
367         if (in[j] >= cstart && in[j] < cend){
368           col  = in[j] - cstart;
369           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
370         }
371         else if (in[j] < 0) continue;
372 #if defined(PETSC_USE_BOPT_g)
373         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
374 #endif
375         else {
376           if (mat->was_assembled) {
377             if (!baij->colmap) {
378               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
379             }
380 
381 #if defined(PETSC_USE_BOPT_g)
382 #if defined (PETSC_USE_CTABLE)
383             { int data;
384             ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
385             if((data - 1) % bs)
386 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
387             }
388 #else
389             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
390 #endif
391 #endif
392 #if defined (PETSC_USE_CTABLE)
393 	    ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
394             col  = (col - 1)/bs;
395 #else
396             col = (baij->colmap[in[j]] - 1)/bs;
397 #endif
398             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
399               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
400               col =  in[j];
401             }
402           }
403           else col = in[j];
404           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
405         }
406       }
407     } else {
408       if (!baij->donotstash) {
409         if (roworiented) {
410           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
411         } else {
412           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
413         }
414       }
415     }
416   }
417   PetscFunctionReturn(0);
418 }
419 #define HASH_KEY 0.6180339887
420 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
421 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
422 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
423 #undef __FUNC__
424 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
425 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
426 {
427   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
428   int         ierr,i,j,row,col;
429   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
430   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
431   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
432   double      tmp;
433   Scalar      ** HD = baij->hd,value;
434 #if defined(PETSC_USE_BOPT_g)
435   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
436 #endif
437 
438   PetscFunctionBegin;
439 
440   for ( i=0; i<m; i++ ) {
441 #if defined(PETSC_USE_BOPT_g)
442     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
443     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
444 #endif
445       row = im[i];
446     if (row >= rstart_orig && row < rend_orig) {
447       for ( j=0; j<n; j++ ) {
448         col = in[j];
449         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
450         /* Look up into the Hash Table */
451         key = (row/bs)*Nbs+(col/bs)+1;
452         h1  = HASH(size,key,tmp);
453 
454 
455         idx = h1;
456 #if defined(PETSC_USE_BOPT_g)
457         insert_ct++;
458         total_ct++;
459         if (HT[idx] != key) {
460           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
461           if (idx == size) {
462             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
463             if (idx == h1) {
464               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
465             }
466           }
467         }
468 #else
469         if (HT[idx] != key) {
470           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
471           if (idx == size) {
472             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
473             if (idx == h1) {
474               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
475             }
476           }
477         }
478 #endif
479         /* A HASH table entry is found, so insert the values at the correct address */
480         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
481         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
482       }
483     } else {
484       if (!baij->donotstash) {
485         if (roworiented) {
486           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
487         } else {
488           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493 #if defined(PETSC_USE_BOPT_g)
494   baij->ht_total_ct = total_ct;
495   baij->ht_insert_ct = insert_ct;
496 #endif
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNC__
501 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
502 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
503 {
504   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
505   int         ierr,i,j,ii,jj,row,col;
506   int         roworiented = baij->roworiented,rstart=baij->rstart ;
507   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
508   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
509   double      tmp;
510   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
511 #if defined(PETSC_USE_BOPT_g)
512   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
513 #endif
514 
515   PetscFunctionBegin;
516 
517   if (roworiented) {
518     stepval = (n-1)*bs;
519   } else {
520     stepval = (m-1)*bs;
521   }
522   for ( i=0; i<m; i++ ) {
523 #if defined(PETSC_USE_BOPT_g)
524     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
525     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
526 #endif
527     row   = im[i];
528     v_t   = v + i*bs2;
529     if (row >= rstart && row < rend) {
530       for ( j=0; j<n; j++ ) {
531         col = in[j];
532 
533         /* Look up into the Hash Table */
534         key = row*Nbs+col+1;
535         h1  = HASH(size,key,tmp);
536 
537         idx = h1;
538 #if defined(PETSC_USE_BOPT_g)
539         total_ct++;
540         insert_ct++;
541        if (HT[idx] != key) {
542           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
543           if (idx == size) {
544             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
545             if (idx == h1) {
546               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
547             }
548           }
549         }
550 #else
551         if (HT[idx] != key) {
552           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
553           if (idx == size) {
554             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
555             if (idx == h1) {
556               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
557             }
558           }
559         }
560 #endif
561         baij_a = HD[idx];
562         if (roworiented) {
563           /*value = v + i*(stepval+bs)*bs + j*bs;*/
564           /* value = v + (i*(stepval+bs)+j)*bs; */
565           value = v_t;
566           v_t  += bs;
567           if (addv == ADD_VALUES) {
568             for ( ii=0; ii<bs; ii++,value+=stepval) {
569               for ( jj=ii; jj<bs2; jj+=bs ) {
570                 baij_a[jj]  += *value++;
571               }
572             }
573           } else {
574             for ( ii=0; ii<bs; ii++,value+=stepval) {
575               for ( jj=ii; jj<bs2; jj+=bs ) {
576                 baij_a[jj]  = *value++;
577               }
578             }
579           }
580         } else {
581           value = v + j*(stepval+bs)*bs + i*bs;
582           if (addv == ADD_VALUES) {
583             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
584               for ( jj=0; jj<bs; jj++ ) {
585                 baij_a[jj]  += *value++;
586               }
587             }
588           } else {
589             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
590               for ( jj=0; jj<bs; jj++ ) {
591                 baij_a[jj]  = *value++;
592               }
593             }
594           }
595         }
596       }
597     } else {
598       if (!baij->donotstash) {
599         if (roworiented) {
600           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
601         } else {
602           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
603         }
604       }
605     }
606   }
607 #if defined(PETSC_USE_BOPT_g)
608   baij->ht_total_ct = total_ct;
609   baij->ht_insert_ct = insert_ct;
610 #endif
611   PetscFunctionReturn(0);
612 }
613 
614 #undef __FUNC__
615 #define __FUNC__ "MatGetValues_MPIBAIJ"
616 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
617 {
618   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
619   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
620   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
621 
622   PetscFunctionBegin;
623   for ( i=0; i<m; i++ ) {
624     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
625     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
626     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
627       row = idxm[i] - bsrstart;
628       for ( j=0; j<n; j++ ) {
629         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
630         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
631         if (idxn[j] >= bscstart && idxn[j] < bscend){
632           col = idxn[j] - bscstart;
633           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
634         } else {
635           if (!baij->colmap) {
636             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
637           }
638 #if defined (PETSC_USE_CTABLE)
639           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
640           data --;
641 #else
642           data = baij->colmap[idxn[j]/bs]-1;
643 #endif
644           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
645           else {
646             col  = data + idxn[j]%bs;
647             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
648           }
649         }
650       }
651     } else {
652       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
653     }
654   }
655  PetscFunctionReturn(0);
656 }
657 
658 #undef __FUNC__
659 #define __FUNC__ "MatNorm_MPIBAIJ"
660 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
661 {
662   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
663   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
664   int        ierr, i,bs2=baij->bs2;
665   double     sum = 0.0;
666   Scalar     *v;
667 
668   PetscFunctionBegin;
669   if (baij->size == 1) {
670     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
671   } else {
672     if (type == NORM_FROBENIUS) {
673       v = amat->a;
674       for (i=0; i<amat->nz*bs2; i++ ) {
675 #if defined(PETSC_USE_COMPLEX)
676         sum += PetscReal(PetscConj(*v)*(*v)); v++;
677 #else
678         sum += (*v)*(*v); v++;
679 #endif
680       }
681       v = bmat->a;
682       for (i=0; i<bmat->nz*bs2; i++ ) {
683 #if defined(PETSC_USE_COMPLEX)
684         sum += PetscReal(PetscConj(*v)*(*v)); v++;
685 #else
686         sum += (*v)*(*v); v++;
687 #endif
688       }
689       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
690       *norm = sqrt(*norm);
691     } else {
692       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
693     }
694   }
695   PetscFunctionReturn(0);
696 }
697 
698 
699 /*
700   Creates the hash table, and sets the table
701   This table is created only once.
702   If new entried need to be added to the matrix
703   then the hash table has to be destroyed and
704   recreated.
705 */
706 #undef __FUNC__
707 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
708 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
709 {
710   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
711   Mat         A = baij->A, B=baij->B;
712   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
713   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
714   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
715   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
716   int         *HT,key;
717   Scalar      **HD;
718   double      tmp;
719 #if defined(PETSC_USE_BOPT_g)
720   int         ct=0,max=0;
721 #endif
722 
723   PetscFunctionBegin;
724   baij->ht_size=(int)(factor*nz);
725   size = baij->ht_size;
726 
727   if (baij->ht) {
728     PetscFunctionReturn(0);
729   }
730 
731   /* Allocate Memory for Hash Table */
732   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
733   baij->ht = (int*)(baij->hd + size);
734   HD = baij->hd;
735   HT = baij->ht;
736 
737 
738   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
739 
740 
741   /* Loop Over A */
742   for ( i=0; i<a->mbs; i++ ) {
743     for ( j=ai[i]; j<ai[i+1]; j++ ) {
744       row = i+rstart;
745       col = aj[j]+cstart;
746 
747       key = row*Nbs + col + 1;
748       h1  = HASH(size,key,tmp);
749       for ( k=0; k<size; k++ ){
750         if (HT[(h1+k)%size] == 0.0) {
751           HT[(h1+k)%size] = key;
752           HD[(h1+k)%size] = a->a + j*bs2;
753           break;
754 #if defined(PETSC_USE_BOPT_g)
755         } else {
756           ct++;
757 #endif
758         }
759       }
760 #if defined(PETSC_USE_BOPT_g)
761       if (k> max) max = k;
762 #endif
763     }
764   }
765   /* Loop Over B */
766   for ( i=0; i<b->mbs; i++ ) {
767     for ( j=bi[i]; j<bi[i+1]; j++ ) {
768       row = i+rstart;
769       col = garray[bj[j]];
770       key = row*Nbs + col + 1;
771       h1  = HASH(size,key,tmp);
772       for ( k=0; k<size; k++ ){
773         if (HT[(h1+k)%size] == 0.0) {
774           HT[(h1+k)%size] = key;
775           HD[(h1+k)%size] = b->a + j*bs2;
776           break;
777 #if defined(PETSC_USE_BOPT_g)
778         } else {
779           ct++;
780 #endif
781         }
782       }
783 #if defined(PETSC_USE_BOPT_g)
784       if (k> max) max = k;
785 #endif
786     }
787   }
788 
789   /* Print Summary */
790 #if defined(PETSC_USE_BOPT_g)
791   for ( i=0,j=0; i<size; i++)
792     if (HT[i]) {j++;}
793   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
794            (j== 0)? 0.0:((double)(ct+j))/j,max);
795 #endif
796   PetscFunctionReturn(0);
797 }
798 
799 #undef __FUNC__
800 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
801 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
802 {
803   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
804   int         ierr,nstash,reallocs;
805   InsertMode  addv;
806 
807   PetscFunctionBegin;
808   if (baij->donotstash) {
809     PetscFunctionReturn(0);
810   }
811 
812   /* make sure all processors are either in INSERTMODE or ADDMODE */
813   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
814   if (addv == (ADD_VALUES|INSERT_VALUES)) {
815     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
816   }
817   mat->insertmode = addv; /* in case this processor had no cache */
818 
819   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
820   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
821   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
822   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
823   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
824   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
825   PetscFunctionReturn(0);
826 }
827 
828 #undef __FUNC__
829 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
830 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
831 {
832   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
833   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
834   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
835   int         *row,*col,other_disassembled,r1,r2,r3;
836   Scalar      *val;
837   InsertMode  addv = mat->insertmode;
838 
839   PetscFunctionBegin;
840   if (!baij->donotstash) {
841     while (1) {
842       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
843       if (!flg) break;
844 
845       for ( i=0; i<n; ) {
846         /* Now identify the consecutive vals belonging to the same row */
847         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
848         if (j < n) ncols = j-i;
849         else       ncols = n-i;
850         /* Now assemble all these values with a single function call */
851         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
852         i = j;
853       }
854     }
855     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
856     /* Now process the block-stash. Since the values are stashed column-oriented,
857        set the roworiented flag to column oriented, and after MatSetValues()
858        restore the original flags */
859     r1 = baij->roworiented;
860     r2 = a->roworiented;
861     r3 = b->roworiented;
862     baij->roworiented = 0;
863     a->roworiented    = 0;
864     b->roworiented    = 0;
865     while (1) {
866       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
867       if (!flg) break;
868 
869       for ( i=0; i<n; ) {
870         /* Now identify the consecutive vals belonging to the same row */
871         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
872         if (j < n) ncols = j-i;
873         else       ncols = n-i;
874         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
875         i = j;
876       }
877     }
878     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
879     baij->roworiented = r1;
880     a->roworiented    = r2;
881     b->roworiented    = r3;
882   }
883 
884   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
885   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
886 
887   /* determine if any processor has disassembled, if so we must
888      also disassemble ourselfs, in order that we may reassemble. */
889   /*
890      if nonzero structure of submatrix B cannot change then we know that
891      no processor disassembled thus we can skip this stuff
892   */
893   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
894     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
895     if (mat->was_assembled && !other_disassembled) {
896       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
897     }
898   }
899 
900   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
901     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
902   }
903   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
904   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
905 
906 #if defined(PETSC_USE_BOPT_g)
907   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
908     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
909              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
910     baij->ht_total_ct  = 0;
911     baij->ht_insert_ct = 0;
912   }
913 #endif
914   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
915     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
916     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
917     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
918   }
919 
920   if (baij->rowvalues) {
921     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
922     baij->rowvalues = 0;
923   }
924   PetscFunctionReturn(0);
925 }
926 
927 /*
928 #undef __FUNC__
929 #define __FUNC__ "MatView_MPIBAIJ_Binary"
930 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
931 {
932   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
933   int          ierr;
934 
935   PetscFunctionBegin;
936   if (baij->size == 1) {
937     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
938   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
939   PetscFunctionReturn(0);
940 }
941 */
942 
943 #undef __FUNC__
944 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
945 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
946 {
947   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
948   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
949   FILE         *fd;
950 
951   PetscFunctionBegin;
952   if (PetscTypeCompare(viewer,ASCII_VIEWER)) {
953     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
954     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
955       MatInfo info;
956       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
957       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
958       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr);
960       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
961               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
962               baij->bs,(int)info.memory);
963       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
964       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
965       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
966       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
967       fflush(fd);
968       ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr);
969       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
972       ierr = PetscPrintf(mat->comm,"  block size is %d\n",bs);CHKERRQ(ierr);
973       PetscFunctionReturn(0);
974     }
975   }
976 
977   if (PetscTypeCompare(viewer,DRAW_VIEWER)) {
978     Draw       draw;
979     PetscTruth isnull;
980     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
981     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
982   }
983 
984   if (size == 1) {
985     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
986   } else {
987     /* assemble the entire matrix onto first processor. */
988     Mat         A;
989     Mat_SeqBAIJ *Aloc;
990     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
991     int         mbs = baij->mbs;
992     Scalar      *a;
993 
994     if (!rank) {
995       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
996     } else {
997       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
998     }
999     PLogObjectParent(mat,A);
1000 
1001     /* copy over the A part */
1002     Aloc = (Mat_SeqBAIJ*) baij->A->data;
1003     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1004     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1005 
1006     for ( i=0; i<mbs; i++ ) {
1007       rvals[0] = bs*(baij->rstart + i);
1008       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1009       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1010         col = (baij->cstart+aj[j])*bs;
1011         for (k=0; k<bs; k++ ) {
1012           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1013           col++; a += bs;
1014         }
1015       }
1016     }
1017     /* copy over the B part */
1018     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1019     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1020     for ( i=0; i<mbs; i++ ) {
1021       rvals[0] = bs*(baij->rstart + i);
1022       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1023       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1024         col = baij->garray[aj[j]]*bs;
1025         for (k=0; k<bs; k++ ) {
1026           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1027           col++; a += bs;
1028         }
1029       }
1030     }
1031     ierr = PetscFree(rvals);CHKERRQ(ierr);
1032     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1033     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1034     /*
1035        Everyone has to call to draw the matrix since the graphics waits are
1036        synchronized across all processors that share the Draw object
1037     */
1038     if (!rank || PetscTypeCompare(viewer,DRAW_VIEWER)) {
1039       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
1040     }
1041     ierr = MatDestroy(A);CHKERRQ(ierr);
1042   }
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 
1047 
1048 #undef __FUNC__
1049 #define __FUNC__ "MatView_MPIBAIJ"
1050 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1051 {
1052   int         ierr;
1053 
1054   PetscFunctionBegin;
1055   if (PetscTypeCompare(viewer,ASCII_VIEWER) || PetscTypeCompare(viewer,DRAW_VIEWER) ||
1056       PetscTypeCompare(viewer,SOCKET_VIEWER)|| PetscTypeCompare(viewer,BINARY_VIEWER)) {
1057     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1058     /*
1059   } else if (PetscTypeCompare(viewer,BINARY_VIEWER)) {
1060     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1061     */
1062   } else {
1063     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNC__
1069 #define __FUNC__ "MatDestroy_MPIBAIJ"
1070 int MatDestroy_MPIBAIJ(Mat mat)
1071 {
1072   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1073   int         ierr;
1074 
1075   PetscFunctionBegin;
1076 
1077   if (mat->mapping) {
1078     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1079   }
1080   if (mat->bmapping) {
1081     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1082   }
1083   if (mat->rmap) {
1084     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1085   }
1086   if (mat->cmap) {
1087     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1088   }
1089 #if defined(PETSC_USE_LOG)
1090   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1091 #endif
1092 
1093   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1094   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1095 
1096   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1097   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1098   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1099 #if defined (PETSC_USE_CTABLE)
1100   if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);}
1101 #else
1102   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1103 #endif
1104   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1105   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1106   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1107   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1108   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1109   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1110   ierr = PetscFree(baij);CHKERRQ(ierr);
1111   PLogObjectDestroy(mat);
1112   PetscHeaderDestroy(mat);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNC__
1117 #define __FUNC__ "MatMult_MPIBAIJ"
1118 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1119 {
1120   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1121   int         ierr, nt;
1122 
1123   PetscFunctionBegin;
1124   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1125   if (nt != a->n) {
1126     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1127   }
1128   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1129   if (nt != a->m) {
1130     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1131   }
1132   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1133   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1134   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1135   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1136   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNC__
1141 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1142 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1143 {
1144   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1145   int        ierr;
1146 
1147   PetscFunctionBegin;
1148   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1149   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1150   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1151   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNC__
1156 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1157 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1158 {
1159   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1160   int         ierr;
1161 
1162   PetscFunctionBegin;
1163   /* do nondiagonal part */
1164   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1165   /* send it on its way */
1166   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1167   /* do local part */
1168   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1169   /* receive remote parts: note this assumes the values are not actually */
1170   /* inserted in yy until the next line, which is true for my implementation*/
1171   /* but is not perhaps always true. */
1172   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNC__
1177 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1178 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1179 {
1180   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1181   int         ierr;
1182 
1183   PetscFunctionBegin;
1184   /* do nondiagonal part */
1185   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1186   /* send it on its way */
1187   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1188   /* do local part */
1189   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1190   /* receive remote parts: note this assumes the values are not actually */
1191   /* inserted in yy until the next line, which is true for my implementation*/
1192   /* but is not perhaps always true. */
1193   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 /*
1198   This only works correctly for square matrices where the subblock A->A is the
1199    diagonal block
1200 */
1201 #undef __FUNC__
1202 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1203 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1204 {
1205   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1206   int         ierr;
1207 
1208   PetscFunctionBegin;
1209   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1210   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNC__
1215 #define __FUNC__ "MatScale_MPIBAIJ"
1216 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1217 {
1218   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1219   int         ierr;
1220 
1221   PetscFunctionBegin;
1222   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1223   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNC__
1228 #define __FUNC__ "MatGetSize_MPIBAIJ"
1229 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1230 {
1231   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1232 
1233   PetscFunctionBegin;
1234   if (m) *m = mat->M;
1235   if (n) *n = mat->N;
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNC__
1240 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1241 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1242 {
1243   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1244 
1245   PetscFunctionBegin;
1246   *m = mat->m; *n = mat->n;
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNC__
1251 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1252 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1253 {
1254   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1255 
1256   PetscFunctionBegin;
1257   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNC__
1262 #define __FUNC__ "MatGetRow_MPIBAIJ"
1263 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1264 {
1265   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1266   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1267   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1268   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1269   int        *cmap, *idx_p,cstart = mat->cstart;
1270 
1271   PetscFunctionBegin;
1272   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1273   mat->getrowactive = PETSC_TRUE;
1274 
1275   if (!mat->rowvalues && (idx || v)) {
1276     /*
1277         allocate enough space to hold information from the longest row.
1278     */
1279     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1280     int     max = 1,mbs = mat->mbs,tmp;
1281     for ( i=0; i<mbs; i++ ) {
1282       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1283       if (max < tmp) { max = tmp; }
1284     }
1285     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1286     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1287   }
1288 
1289   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1290   lrow = row - brstart;
1291 
1292   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1293   if (!v)   {pvA = 0; pvB = 0;}
1294   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1295   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1296   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1297   nztot = nzA + nzB;
1298 
1299   cmap  = mat->garray;
1300   if (v  || idx) {
1301     if (nztot) {
1302       /* Sort by increasing column numbers, assuming A and B already sorted */
1303       int imark = -1;
1304       if (v) {
1305         *v = v_p = mat->rowvalues;
1306         for ( i=0; i<nzB; i++ ) {
1307           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1308           else break;
1309         }
1310         imark = i;
1311         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1312         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1313       }
1314       if (idx) {
1315         *idx = idx_p = mat->rowindices;
1316         if (imark > -1) {
1317           for ( i=0; i<imark; i++ ) {
1318             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1319           }
1320         } else {
1321           for ( i=0; i<nzB; i++ ) {
1322             if (cmap[cworkB[i]/bs] < cstart)
1323               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1324             else break;
1325           }
1326           imark = i;
1327         }
1328         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1329         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1330       }
1331     } else {
1332       if (idx) *idx = 0;
1333       if (v)   *v   = 0;
1334     }
1335   }
1336   *nz = nztot;
1337   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1338   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNC__
1343 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1344 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1345 {
1346   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1347 
1348   PetscFunctionBegin;
1349   if (baij->getrowactive == PETSC_FALSE) {
1350     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1351   }
1352   baij->getrowactive = PETSC_FALSE;
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNC__
1357 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1358 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1359 {
1360   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1361 
1362   PetscFunctionBegin;
1363   *bs = baij->bs;
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNC__
1368 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1369 int MatZeroEntries_MPIBAIJ(Mat A)
1370 {
1371   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1372   int         ierr;
1373 
1374   PetscFunctionBegin;
1375   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1376   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNC__
1381 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1382 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1383 {
1384   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1385   Mat         A = a->A, B = a->B;
1386   int         ierr;
1387   double      isend[5], irecv[5];
1388 
1389   PetscFunctionBegin;
1390   info->block_size     = (double)a->bs;
1391   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1392   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1393   isend[3] = info->memory;  isend[4] = info->mallocs;
1394   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1395   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1396   isend[3] += info->memory;  isend[4] += info->mallocs;
1397   if (flag == MAT_LOCAL) {
1398     info->nz_used      = isend[0];
1399     info->nz_allocated = isend[1];
1400     info->nz_unneeded  = isend[2];
1401     info->memory       = isend[3];
1402     info->mallocs      = isend[4];
1403   } else if (flag == MAT_GLOBAL_MAX) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   } else if (flag == MAT_GLOBAL_SUM) {
1411     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1412     info->nz_used      = irecv[0];
1413     info->nz_allocated = irecv[1];
1414     info->nz_unneeded  = irecv[2];
1415     info->memory       = irecv[3];
1416     info->mallocs      = irecv[4];
1417   } else {
1418     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1419   }
1420   info->rows_global       = (double)a->M;
1421   info->columns_global    = (double)a->N;
1422   info->rows_local        = (double)a->m;
1423   info->columns_local     = (double)a->N;
1424   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1425   info->fill_ratio_needed = 0;
1426   info->factor_mallocs    = 0;
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNC__
1431 #define __FUNC__ "MatSetOption_MPIBAIJ"
1432 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1433 {
1434   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1435   int         ierr;
1436 
1437   PetscFunctionBegin;
1438   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1439       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1440       op == MAT_COLUMNS_UNSORTED ||
1441       op == MAT_COLUMNS_SORTED ||
1442       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1443       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1444         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1445         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1446   } else if (op == MAT_ROW_ORIENTED) {
1447         a->roworiented = 1;
1448         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1449         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1450   } else if (op == MAT_ROWS_SORTED ||
1451              op == MAT_ROWS_UNSORTED ||
1452              op == MAT_SYMMETRIC ||
1453              op == MAT_STRUCTURALLY_SYMMETRIC ||
1454              op == MAT_YES_NEW_DIAGONALS ||
1455              op == MAT_USE_HASH_TABLE) {
1456     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1457   } else if (op == MAT_COLUMN_ORIENTED) {
1458     a->roworiented = 0;
1459     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1460     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1461   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1462     a->donotstash = 1;
1463   } else if (op == MAT_NO_NEW_DIAGONALS) {
1464     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1465   } else if (op == MAT_USE_HASH_TABLE) {
1466     a->ht_flag = 1;
1467   } else {
1468     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1469   }
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNC__
1474 #define __FUNC__ "MatTranspose_MPIBAIJ("
1475 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1476 {
1477   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1478   Mat_SeqBAIJ *Aloc;
1479   Mat        B;
1480   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1481   int        bs=baij->bs,mbs=baij->mbs;
1482   Scalar     *a;
1483 
1484   PetscFunctionBegin;
1485   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1486   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1487 CHKERRQ(ierr);
1488 
1489   /* copy over the A part */
1490   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1491   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1492   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1493 
1494   for ( i=0; i<mbs; i++ ) {
1495     rvals[0] = bs*(baij->rstart + i);
1496     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1497     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1498       col = (baij->cstart+aj[j])*bs;
1499       for (k=0; k<bs; k++ ) {
1500         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1501         col++; a += bs;
1502       }
1503     }
1504   }
1505   /* copy over the B part */
1506   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1507   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1508   for ( i=0; i<mbs; i++ ) {
1509     rvals[0] = bs*(baij->rstart + i);
1510     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1511     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1512       col = baij->garray[aj[j]]*bs;
1513       for (k=0; k<bs; k++ ) {
1514         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1515         col++; a += bs;
1516       }
1517     }
1518   }
1519   ierr = PetscFree(rvals);CHKERRQ(ierr);
1520   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1521   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1522 
1523   if (matout != PETSC_NULL) {
1524     *matout = B;
1525   } else {
1526     PetscOps *Abops;
1527     MatOps   Aops;
1528 
1529     /* This isn't really an in-place transpose .... but free data structures from baij */
1530     ierr = PetscFree(baij->rowners); CHKERRQ(ierr);
1531     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1532     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1533 #if defined (PETSC_USE_CTABLE)
1534     if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);}
1535 #else
1536     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1537 #endif
1538     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1539     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1540     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1541     ierr = PetscFree(baij);CHKERRQ(ierr);
1542 
1543     /*
1544        This is horrible, horrible code. We need to keep the
1545       A pointers for the bops and ops but copy everything
1546       else from C.
1547     */
1548     Abops   = A->bops;
1549     Aops    = A->ops;
1550     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1551     A->bops = Abops;
1552     A->ops  = Aops;
1553 
1554     PetscHeaderDestroy(B);
1555   }
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNC__
1560 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1561 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1562 {
1563   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1564   Mat         a = baij->A, b = baij->B;
1565   int         ierr,s1,s2,s3;
1566 
1567   PetscFunctionBegin;
1568   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1569   if (rr) {
1570     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1571     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1572     /* Overlap communication with computation. */
1573     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1574   }
1575   if (ll) {
1576     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1577     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1578     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1579   }
1580   /* scale  the diagonal block */
1581   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1582 
1583   if (rr) {
1584     /* Do a scatter end and then right scale the off-diagonal block */
1585     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1586     ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr);
1587   }
1588 
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 #undef __FUNC__
1593 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1594 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1595 {
1596   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1597   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1598   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1599   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1600   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1601   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1602   MPI_Comm       comm = A->comm;
1603   MPI_Request    *send_waits,*recv_waits;
1604   MPI_Status     recv_status,*send_status;
1605   IS             istmp;
1606 
1607   PetscFunctionBegin;
1608   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1609   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1610 
1611   /*  first count number of contributors to each processor */
1612   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1613   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1614  procs   = nprocs + size;
1615   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1616   for ( i=0; i<N; i++ ) {
1617     idx = rows[i];
1618     found = 0;
1619     for ( j=0; j<size; j++ ) {
1620       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1621         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1622       }
1623     }
1624     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1625   }
1626   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1627 
1628   /* inform other processors of number of messages and max length*/
1629   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
1630   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1631   nrecvs = work[rank];
1632   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1633   nmax   = work[rank];
1634   ierr = PetscFree(work);CHKERRQ(ierr);
1635 
1636   /* post receives:   */
1637   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1638   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1639   for ( i=0; i<nrecvs; i++ ) {
1640     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1641   }
1642 
1643   /* do sends:
1644      1) starts[i] gives the starting index in svalues for stuff going to
1645      the ith processor
1646   */
1647   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1648   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1649   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
1650   starts[0] = 0;
1651   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1652   for ( i=0; i<N; i++ ) {
1653     svalues[starts[owner[i]]++] = rows[i];
1654   }
1655   ISRestoreIndices(is,&rows);
1656 
1657   starts[0] = 0;
1658   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1659   count = 0;
1660   for ( i=0; i<size; i++ ) {
1661     if (procs[i]) {
1662       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1663     }
1664   }
1665   ierr = PetscFree(starts);CHKERRQ(ierr);
1666 
1667   base = owners[rank]*bs;
1668 
1669   /*  wait on receives */
1670   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
1671   source = lens + nrecvs;
1672   count  = nrecvs; slen = 0;
1673   while (count) {
1674     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1675     /* unpack receives into our local space */
1676     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1677     source[imdex]  = recv_status.MPI_SOURCE;
1678     lens[imdex]  = n;
1679     slen += n;
1680     count--;
1681   }
1682   ierr = PetscFree(recv_waits); CHKERRQ(ierr);
1683 
1684   /* move the data into the send scatter */
1685   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
1686   count = 0;
1687   for ( i=0; i<nrecvs; i++ ) {
1688     values = rvalues + i*nmax;
1689     for ( j=0; j<lens[i]; j++ ) {
1690       lrows[count++] = values[j] - base;
1691     }
1692   }
1693   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1694   ierr = PetscFree(lens);CHKERRQ(ierr);
1695   ierr = PetscFree(owner);CHKERRQ(ierr);
1696   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1697 
1698   /* actually zap the local rows */
1699   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1700   PLogObjectParent(A,istmp);
1701 
1702   /*
1703         Zero the required rows. If the "diagonal block" of the matrix
1704      is square and the user wishes to set the diagonal we use seperate
1705      code so that MatSetValues() is not called for each diagonal allocating
1706      new memory, thus calling lots of mallocs and slowing things down.
1707 
1708        Contributed by: Mathew Knepley
1709   */
1710   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1711   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
1712   if (diag && (l->A->M == l->A->N)) {
1713     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
1714   } else if (diag) {
1715     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1716     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1717       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1718 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1719     }
1720     for ( i = 0; i < slen; i++ ) {
1721       row  = lrows[i] + rstart_bs;
1722       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1723     }
1724     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1725     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1726   } else {
1727     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1728   }
1729 
1730   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1731   ierr = PetscFree(lrows);CHKERRQ(ierr);
1732 
1733   /* wait on sends */
1734   if (nsends) {
1735     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1736     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1737     ierr = PetscFree(send_status);CHKERRQ(ierr);
1738   }
1739   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1740   ierr = PetscFree(svalues);CHKERRQ(ierr);
1741 
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 #undef __FUNC__
1746 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1747 int MatPrintHelp_MPIBAIJ(Mat A)
1748 {
1749   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1750   MPI_Comm    comm = A->comm;
1751   static int  called = 0;
1752   int         ierr;
1753 
1754   PetscFunctionBegin;
1755   if (!a->rank) {
1756     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1757   }
1758   if (called) {PetscFunctionReturn(0);} else called = 1;
1759   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1760   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNC__
1765 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1766 int MatSetUnfactored_MPIBAIJ(Mat A)
1767 {
1768   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1769   int         ierr;
1770 
1771   PetscFunctionBegin;
1772   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1777 
1778 #undef __FUNC__
1779 #define __FUNC__ "MatEqual_MPIBAIJ"
1780 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
1781 {
1782   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
1783   Mat         a, b, c, d;
1784   PetscTruth  flg;
1785   int         ierr;
1786 
1787   PetscFunctionBegin;
1788   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1789   a = matA->A; b = matA->B;
1790   c = matB->A; d = matB->B;
1791 
1792   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1793   if (flg == PETSC_TRUE) {
1794     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1795   }
1796   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 /* -------------------------------------------------------------------*/
1801 static struct _MatOps MatOps_Values = {
1802   MatSetValues_MPIBAIJ,
1803   MatGetRow_MPIBAIJ,
1804   MatRestoreRow_MPIBAIJ,
1805   MatMult_MPIBAIJ,
1806   MatMultAdd_MPIBAIJ,
1807   MatMultTrans_MPIBAIJ,
1808   MatMultTransAdd_MPIBAIJ,
1809   0,
1810   0,
1811   0,
1812   0,
1813   0,
1814   0,
1815   0,
1816   MatTranspose_MPIBAIJ,
1817   MatGetInfo_MPIBAIJ,
1818   MatEqual_MPIBAIJ,
1819   MatGetDiagonal_MPIBAIJ,
1820   MatDiagonalScale_MPIBAIJ,
1821   MatNorm_MPIBAIJ,
1822   MatAssemblyBegin_MPIBAIJ,
1823   MatAssemblyEnd_MPIBAIJ,
1824   0,
1825   MatSetOption_MPIBAIJ,
1826   MatZeroEntries_MPIBAIJ,
1827   MatZeroRows_MPIBAIJ,
1828   0,
1829   0,
1830   0,
1831   0,
1832   MatGetSize_MPIBAIJ,
1833   MatGetLocalSize_MPIBAIJ,
1834   MatGetOwnershipRange_MPIBAIJ,
1835   0,
1836   0,
1837   0,
1838   0,
1839   MatDuplicate_MPIBAIJ,
1840   0,
1841   0,
1842   0,
1843   0,
1844   0,
1845   MatGetSubMatrices_MPIBAIJ,
1846   MatIncreaseOverlap_MPIBAIJ,
1847   MatGetValues_MPIBAIJ,
1848   0,
1849   MatPrintHelp_MPIBAIJ,
1850   MatScale_MPIBAIJ,
1851   0,
1852   0,
1853   0,
1854   MatGetBlockSize_MPIBAIJ,
1855   0,
1856   0,
1857   0,
1858   0,
1859   0,
1860   0,
1861   MatSetUnfactored_MPIBAIJ,
1862   0,
1863   MatSetValuesBlocked_MPIBAIJ,
1864   0,
1865   0,
1866   0,
1867   MatGetMaps_Petsc};
1868 
1869 
1870 EXTERN_C_BEGIN
1871 #undef __FUNC__
1872 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1873 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1874 {
1875   PetscFunctionBegin;
1876   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1877   *iscopy = PETSC_FALSE;
1878   PetscFunctionReturn(0);
1879 }
1880 EXTERN_C_END
1881 
1882 #undef __FUNC__
1883 #define __FUNC__ "MatCreateMPIBAIJ"
1884 /*@C
1885    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1886    (block compressed row).  For good matrix assembly performance
1887    the user should preallocate the matrix storage by setting the parameters
1888    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1889    performance can be increased by more than a factor of 50.
1890 
1891    Collective on MPI_Comm
1892 
1893    Input Parameters:
1894 +  comm - MPI communicator
1895 .  bs   - size of blockk
1896 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1897            This value should be the same as the local size used in creating the
1898            y vector for the matrix-vector product y = Ax.
1899 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1900            This value should be the same as the local size used in creating the
1901            x vector for the matrix-vector product y = Ax.
1902 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1903 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1904 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1905            submatrix  (same for all local rows)
1906 .  d_nnz - array containing the number of block nonzeros in the various block rows
1907            of the in diagonal portion of the local (possibly different for each block
1908            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1909 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1910            submatrix (same for all local rows).
1911 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1912            off-diagonal portion of the local submatrix (possibly different for
1913            each block row) or PETSC_NULL.
1914 
1915    Output Parameter:
1916 .  A - the matrix
1917 
1918    Options Database Keys:
1919 .   -mat_no_unroll - uses code that does not unroll the loops in the
1920                      block calculations (much slower)
1921 .   -mat_block_size - size of the blocks to use
1922 .   -mat_mpi - use the parallel matrix data structures even on one processor
1923                (defaults to using SeqBAIJ format on one processor)
1924 
1925    Notes:
1926    The user MUST specify either the local or global matrix dimensions
1927    (possibly both).
1928 
1929    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1930    than it must be used on all processors that share the object for that argument.
1931 
1932    Storage Information:
1933    For a square global matrix we define each processor's diagonal portion
1934    to be its local rows and the corresponding columns (a square submatrix);
1935    each processor's off-diagonal portion encompasses the remainder of the
1936    local matrix (a rectangular submatrix).
1937 
1938    The user can specify preallocated storage for the diagonal part of
1939    the local submatrix with either d_nz or d_nnz (not both).  Set
1940    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1941    memory allocation.  Likewise, specify preallocated storage for the
1942    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1943 
1944    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1945    the figure below we depict these three local rows and all columns (0-11).
1946 
1947 .vb
1948            0 1 2 3 4 5 6 7 8 9 10 11
1949           -------------------
1950    row 3  |  o o o d d d o o o o o o
1951    row 4  |  o o o d d d o o o o o o
1952    row 5  |  o o o d d d o o o o o o
1953           -------------------
1954 .ve
1955 
1956    Thus, any entries in the d locations are stored in the d (diagonal)
1957    submatrix, and any entries in the o locations are stored in the
1958    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1959    stored simply in the MATSEQBAIJ format for compressed row storage.
1960 
1961    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1962    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1963    In general, for PDE problems in which most nonzeros are near the diagonal,
1964    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1965    or you will get TERRIBLE performance; see the users' manual chapter on
1966    matrices.
1967 
1968    Level: intermediate
1969 
1970 .keywords: matrix, block, aij, compressed row, sparse, parallel
1971 
1972 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1973 @*/
1974 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1975                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1976 {
1977   Mat          B;
1978   Mat_MPIBAIJ  *b;
1979   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1980   int          flag1 = 0,flag2 = 0;
1981 
1982   PetscFunctionBegin;
1983   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1984 
1985   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1986   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
1987   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
1988   if (d_nnz) {
1989     for (i=0; i<m/bs; i++) {
1990       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1991     }
1992   }
1993   if (o_nnz) {
1994     for (i=0; i<m/bs; i++) {
1995       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1996     }
1997   }
1998 
1999   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2000   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2001   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2002   if (!flag1 && !flag2 && size == 1) {
2003     if (M == PETSC_DECIDE) M = m;
2004     if (N == PETSC_DECIDE) N = n;
2005     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2006     PetscFunctionReturn(0);
2007   }
2008 
2009   *A = 0;
2010   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2011   PLogObjectCreate(B);
2012   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2013   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2014   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2015 
2016   B->ops->destroy    = MatDestroy_MPIBAIJ;
2017   B->ops->view       = MatView_MPIBAIJ;
2018   B->mapping    = 0;
2019   B->factor     = 0;
2020   B->assembled  = PETSC_FALSE;
2021 
2022   B->insertmode = NOT_SET_VALUES;
2023   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2024   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2025 
2026   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2027     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2028   }
2029   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2030     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2031   }
2032   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2033     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2034   }
2035   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2036   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2037 
2038   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2039     work[0] = m; work[1] = n;
2040     mbs = m/bs; nbs = n/bs;
2041     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2042     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2043     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2044   }
2045   if (m == PETSC_DECIDE) {
2046     Mbs = M/bs;
2047     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2048     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2049     m   = mbs*bs;
2050   }
2051   if (n == PETSC_DECIDE) {
2052     Nbs = N/bs;
2053     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2054     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2055     n   = nbs*bs;
2056   }
2057   if (mbs*bs != m || nbs*bs != n) {
2058     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2059   }
2060 
2061   b->m = m; B->m = m;
2062   b->n = n; B->n = n;
2063   b->N = N; B->N = N;
2064   b->M = M; B->M = M;
2065   b->bs  = bs;
2066   b->bs2 = bs*bs;
2067   b->mbs = mbs;
2068   b->nbs = nbs;
2069   b->Mbs = Mbs;
2070   b->Nbs = Nbs;
2071 
2072   /* the information in the maps duplicates the information computed below, eventually
2073      we should remove the duplicate information that is not contained in the maps */
2074   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2075   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2076 
2077   /* build local table of row and column ownerships */
2078   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2079   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2080   b->cowners    = b->rowners + b->size + 2;
2081   b->rowners_bs = b->cowners + b->size + 2;
2082   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2083   b->rowners[0]    = 0;
2084   for ( i=2; i<=b->size; i++ ) {
2085     b->rowners[i] += b->rowners[i-1];
2086   }
2087   for ( i=0; i<=b->size; i++ ) {
2088     b->rowners_bs[i] = b->rowners[i]*bs;
2089   }
2090   b->rstart    = b->rowners[b->rank];
2091   b->rend      = b->rowners[b->rank+1];
2092   b->rstart_bs = b->rstart * bs;
2093   b->rend_bs   = b->rend * bs;
2094 
2095   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2096   b->cowners[0] = 0;
2097   for ( i=2; i<=b->size; i++ ) {
2098     b->cowners[i] += b->cowners[i-1];
2099   }
2100   b->cstart    = b->cowners[b->rank];
2101   b->cend      = b->cowners[b->rank+1];
2102   b->cstart_bs = b->cstart * bs;
2103   b->cend_bs   = b->cend * bs;
2104 
2105 
2106   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2107   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2108   PLogObjectParent(B,b->A);
2109   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2110   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2111   PLogObjectParent(B,b->B);
2112 
2113   /* build cache for off array entries formed */
2114   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2115   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2116   b->donotstash  = 0;
2117   b->colmap      = 0;
2118   b->garray      = 0;
2119   b->roworiented = 1;
2120 
2121   /* stuff used in block assembly */
2122   b->barray       = 0;
2123 
2124   /* stuff used for matrix vector multiply */
2125   b->lvec         = 0;
2126   b->Mvctx        = 0;
2127 
2128   /* stuff for MatGetRow() */
2129   b->rowindices   = 0;
2130   b->rowvalues    = 0;
2131   b->getrowactive = PETSC_FALSE;
2132 
2133   /* hash table stuff */
2134   b->ht           = 0;
2135   b->hd           = 0;
2136   b->ht_size      = 0;
2137   b->ht_flag      = 0;
2138   b->ht_fact      = 0;
2139   b->ht_total_ct  = 0;
2140   b->ht_insert_ct = 0;
2141 
2142   *A = B;
2143   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2144   if (flg) {
2145     double fact = 1.39;
2146     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2147     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr);
2148     if (fact <= 1.0) fact = 1.39;
2149     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2150     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2151   }
2152   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
2153                                      "MatStoreValues_MPIBAIJ",
2154                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2155   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
2156                                      "MatRetrieveValues_MPIBAIJ",
2157                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2158   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2159                                      "MatGetDiagonalBlock_MPIBAIJ",
2160                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 #undef __FUNC__
2165 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2166 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2167 {
2168   Mat         mat;
2169   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2170   int         ierr, len=0, flg;
2171 
2172   PetscFunctionBegin;
2173   *newmat       = 0;
2174   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2175   PLogObjectCreate(mat);
2176   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2177   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2178   mat->ops->destroy = MatDestroy_MPIBAIJ;
2179   mat->ops->view    = MatView_MPIBAIJ;
2180   mat->factor       = matin->factor;
2181   mat->assembled    = PETSC_TRUE;
2182   mat->insertmode   = NOT_SET_VALUES;
2183 
2184   a->m = mat->m   = oldmat->m;
2185   a->n = mat->n   = oldmat->n;
2186   a->M = mat->M   = oldmat->M;
2187   a->N = mat->N   = oldmat->N;
2188 
2189   a->bs  = oldmat->bs;
2190   a->bs2 = oldmat->bs2;
2191   a->mbs = oldmat->mbs;
2192   a->nbs = oldmat->nbs;
2193   a->Mbs = oldmat->Mbs;
2194   a->Nbs = oldmat->Nbs;
2195 
2196   a->rstart       = oldmat->rstart;
2197   a->rend         = oldmat->rend;
2198   a->cstart       = oldmat->cstart;
2199   a->cend         = oldmat->cend;
2200   a->size         = oldmat->size;
2201   a->rank         = oldmat->rank;
2202   a->donotstash   = oldmat->donotstash;
2203   a->roworiented  = oldmat->roworiented;
2204   a->rowindices   = 0;
2205   a->rowvalues    = 0;
2206   a->getrowactive = PETSC_FALSE;
2207   a->barray       = 0;
2208   a->rstart_bs    = oldmat->rstart_bs;
2209   a->rend_bs      = oldmat->rend_bs;
2210   a->cstart_bs    = oldmat->cstart_bs;
2211   a->cend_bs      = oldmat->cend_bs;
2212 
2213   /* hash table stuff */
2214   a->ht           = 0;
2215   a->hd           = 0;
2216   a->ht_size      = 0;
2217   a->ht_flag      = oldmat->ht_flag;
2218   a->ht_fact      = oldmat->ht_fact;
2219   a->ht_total_ct  = 0;
2220   a->ht_insert_ct = 0;
2221 
2222 
2223   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2224   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2225   a->cowners    = a->rowners + a->size + 2;
2226   a->rowners_bs = a->cowners + a->size + 2;
2227   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2228   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2229   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2230   if (oldmat->colmap) {
2231 #if defined (PETSC_USE_CTABLE)
2232   ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2233 #else
2234     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2235     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2236     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2237 #endif
2238   } else a->colmap = 0;
2239   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2240     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2241     PLogObjectMemory(mat,len*sizeof(int));
2242     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2243   } else a->garray = 0;
2244 
2245   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2246   PLogObjectParent(mat,a->lvec);
2247   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2248 
2249   PLogObjectParent(mat,a->Mvctx);
2250   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2251   PLogObjectParent(mat,a->A);
2252   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2253   PLogObjectParent(mat,a->B);
2254   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2255   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2256   if (flg) {
2257     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2258   }
2259   *newmat = mat;
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #include "sys.h"
2264 
2265 #undef __FUNC__
2266 #define __FUNC__ "MatLoad_MPIBAIJ"
2267 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2268 {
2269   Mat          A;
2270   int          i, nz, ierr, j,rstart, rend, fd;
2271   Scalar       *vals,*buf;
2272   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2273   MPI_Status   status;
2274   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2275   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2276   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2277   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2278   int          dcount,kmax,k,nzcount,tmp;
2279 
2280   PetscFunctionBegin;
2281   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2282 
2283   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2284   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2285   if (!rank) {
2286     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2287     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2288     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2289     if (header[3] < 0) {
2290       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2291     }
2292   }
2293 
2294   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2295   M = header[1]; N = header[2];
2296 
2297   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2298 
2299   /*
2300      This code adds extra rows to make sure the number of rows is
2301      divisible by the blocksize
2302   */
2303   Mbs        = M/bs;
2304   extra_rows = bs - M + bs*(Mbs);
2305   if (extra_rows == bs) extra_rows = 0;
2306   else                  Mbs++;
2307   if (extra_rows &&!rank) {
2308     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2309   }
2310 
2311   /* determine ownership of all rows */
2312   mbs = Mbs/size + ((Mbs % size) > rank);
2313   m   = mbs * bs;
2314   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2315   browners = rowners + size + 1;
2316   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2317   rowners[0] = 0;
2318   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2319   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2320   rstart = rowners[rank];
2321   rend   = rowners[rank+1];
2322 
2323   /* distribute row lengths to all processors */
2324   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
2325   if (!rank) {
2326     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2327     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2328     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2329     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2330     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2331     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2332     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2333   } else {
2334     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2335   }
2336 
2337   if (!rank) {
2338     /* calculate the number of nonzeros on each processor */
2339     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2340     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2341     for ( i=0; i<size; i++ ) {
2342       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2343         procsnz[i] += rowlengths[j];
2344       }
2345     }
2346     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2347 
2348     /* determine max buffer needed and allocate it */
2349     maxnz = 0;
2350     for ( i=0; i<size; i++ ) {
2351       maxnz = PetscMax(maxnz,procsnz[i]);
2352     }
2353     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2354 
2355     /* read in my part of the matrix column indices  */
2356     nz = procsnz[0];
2357     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2358     mycols = ibuf;
2359     if (size == 1)  nz -= extra_rows;
2360     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2361     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2362 
2363     /* read in every ones (except the last) and ship off */
2364     for ( i=1; i<size-1; i++ ) {
2365       nz   = procsnz[i];
2366       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2367       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2368     }
2369     /* read in the stuff for the last proc */
2370     if ( size != 1 ) {
2371       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2372       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2373       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2374       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2375     }
2376     ierr = PetscFree(cols);CHKERRQ(ierr);
2377   } else {
2378     /* determine buffer space needed for message */
2379     nz = 0;
2380     for ( i=0; i<m; i++ ) {
2381       nz += locrowlens[i];
2382     }
2383     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2384     mycols = ibuf;
2385     /* receive message of column indices*/
2386     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2387     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2388     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2389   }
2390 
2391   /* loop over local rows, determining number of off diagonal entries */
2392   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2393   odlens = dlens + (rend-rstart);
2394   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2395   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2396   masked1 = mask    + Mbs;
2397   masked2 = masked1 + Mbs;
2398   rowcount = 0; nzcount = 0;
2399   for ( i=0; i<mbs; i++ ) {
2400     dcount  = 0;
2401     odcount = 0;
2402     for ( j=0; j<bs; j++ ) {
2403       kmax = locrowlens[rowcount];
2404       for ( k=0; k<kmax; k++ ) {
2405         tmp = mycols[nzcount++]/bs;
2406         if (!mask[tmp]) {
2407           mask[tmp] = 1;
2408           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2409           else masked1[dcount++] = tmp;
2410         }
2411       }
2412       rowcount++;
2413     }
2414 
2415     dlens[i]  = dcount;
2416     odlens[i] = odcount;
2417 
2418     /* zero out the mask elements we set */
2419     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2420     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2421   }
2422 
2423   /* create our matrix */
2424   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2425   A = *newmat;
2426   MatSetOption(A,MAT_COLUMNS_SORTED);
2427 
2428   if (!rank) {
2429     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
2430     /* read in my part of the matrix numerical values  */
2431     nz = procsnz[0];
2432     vals = buf;
2433     mycols = ibuf;
2434     if (size == 1)  nz -= extra_rows;
2435     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2436     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2437 
2438     /* insert into matrix */
2439     jj      = rstart*bs;
2440     for ( i=0; i<m; i++ ) {
2441       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2442       mycols += locrowlens[i];
2443       vals   += locrowlens[i];
2444       jj++;
2445     }
2446     /* read in other processors (except the last one) and ship out */
2447     for ( i=1; i<size-1; i++ ) {
2448       nz   = procsnz[i];
2449       vals = buf;
2450       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2451       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2452     }
2453     /* the last proc */
2454     if ( size != 1 ){
2455       nz   = procsnz[i] - extra_rows;
2456       vals = buf;
2457       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2458       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2459       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2460     }
2461     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2462   } else {
2463     /* receive numeric values */
2464     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
2465 
2466     /* receive message of values*/
2467     vals   = buf;
2468     mycols = ibuf;
2469     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2470     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2471     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2472 
2473     /* insert into matrix */
2474     jj      = rstart*bs;
2475     for ( i=0; i<m; i++ ) {
2476       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2477       mycols += locrowlens[i];
2478       vals   += locrowlens[i];
2479       jj++;
2480     }
2481   }
2482   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2483   ierr = PetscFree(buf);CHKERRQ(ierr);
2484   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2485   ierr = PetscFree(rowners);CHKERRQ(ierr);
2486   ierr = PetscFree(dlens);CHKERRQ(ierr);
2487   ierr = PetscFree(mask);CHKERRQ(ierr);
2488   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2489   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 #undef __FUNC__
2494 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2495 /*@
2496    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2497 
2498    Input Parameters:
2499 .  mat  - the matrix
2500 .  fact - factor
2501 
2502    Collective on Mat
2503 
2504    Level: advanced
2505 
2506   Notes:
2507    This can also be set by the command line option: -mat_use_hash_table fact
2508 
2509 .keywords: matrix, hashtable, factor, HT
2510 
2511 .seealso: MatSetOption()
2512 @*/
2513 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2514 {
2515   Mat_MPIBAIJ *baij;
2516 
2517   PetscFunctionBegin;
2518   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2519   if (mat->type != MATMPIBAIJ) {
2520       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2521   }
2522   baij = (Mat_MPIBAIJ*) mat->data;
2523   baij->ht_fact = fact;
2524   PetscFunctionReturn(0);
2525 }
2526