xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ac2a4f0d24b3b6a4ee93edbcad41f4bb9e923944)
1 /*$Id: mpibaij.c,v 1.180 1999/10/13 20:37:30 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 extern int MatSetUpMultiply_MPIBAIJ(Mat);
7 extern int DisAssemble_MPIBAIJ(Mat);
8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15 extern int MatPrintHelp_SeqBAIJ(Mat);
16 
17 EXTERN_C_BEGIN
18 #undef __FUNC__
19 #define __FUNC__ "MatStoreValues_MPIBAIJ"
20 int MatStoreValues_MPIBAIJ(Mat mat)
21 {
22   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
23   int         ierr;
24 
25   PetscFunctionBegin;
26   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
27   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 EXTERN_C_END
31 
32 EXTERN_C_BEGIN
33 #undef __FUNC__
34 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
35 int MatRetrieveValues_MPIBAIJ(Mat mat)
36 {
37   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
38   int         ierr;
39 
40   PetscFunctionBegin;
41   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
42   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
43   PetscFunctionReturn(0);
44 }
45 EXTERN_C_END
46 
47 /*
48      Local utility routine that creates a mapping from the global column
49    number to the local number in the off-diagonal part of the local
50    storage of the matrix.  This is done in a non scable way since the
51    length of colmap equals the global matrix length.
52 */
53 #undef __FUNC__
54 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
55 static int CreateColmap_MPIBAIJ_Private(Mat mat)
56 {
57   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
58   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
59   int         nbs = B->nbs,i,bs=B->bs,ierr;
60 
61   PetscFunctionBegin;
62 #if defined (PETSC_USE_CTABLE)
63   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
64   for ( i=0; i<nbs; i++ ){
65     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
66   }
67 #else
68   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
69   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
70   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
71   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
72 #endif
73   PetscFunctionReturn(0);
74 }
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for ( _i=low; _i<high; _i++ ) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         Scalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
109  \
110         /* malloc new storage space */ \
111         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
112         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
113         new_j   = (int *) (new_a + bs2*new_nz); \
114         new_i   = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
118         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
122                           len*sizeof(int));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
124         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
125         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
126                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
127         /* free up old matrix storage */ \
128         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
129         if (!a->singlemalloc) { \
130           ierr = PetscFree(a->i);CHKERRQ(ierr); \
131           ierr = PetscFree(a->j);CHKERRQ(ierr);\
132         } \
133         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
134         a->singlemalloc = 1; \
135  \
136         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
137         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
138         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
139         a->maxnz += bs2*CHUNKSIZE; \
140         a->reallocs++; \
141         a->nz++; \
142       } \
143       N = nrow++ - 1;  \
144       /* shift up all the later entries in this row */ \
145       for ( ii=N; ii>=_i; ii-- ) { \
146         rp[ii+1] = rp[ii]; \
147         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
148       } \
149       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
150       rp[_i]                      = bcol;  \
151       ap[bs2*_i + bs*cidx + ridx] = value;  \
152       a_noinsert:; \
153     ailen[brow] = nrow; \
154 }
155 
156 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
157 { \
158  \
159     brow = row/bs;  \
160     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
161     rmax = bimax[brow]; nrow = bilen[brow]; \
162       bcol = col/bs; \
163       ridx = row % bs; cidx = col % bs; \
164       low = 0; high = nrow; \
165       while (high-low > 3) { \
166         t = (low+high)/2; \
167         if (rp[t] > bcol) high = t; \
168         else              low  = t; \
169       } \
170       for ( _i=low; _i<high; _i++ ) { \
171         if (rp[_i] > bcol) break; \
172         if (rp[_i] == bcol) { \
173           bap  = ap +  bs2*_i + bs*cidx + ridx; \
174           if (addv == ADD_VALUES) *bap += value;  \
175           else                    *bap  = value;  \
176           goto b_noinsert; \
177         } \
178       } \
179       if (b->nonew == 1) goto b_noinsert; \
180       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
181       if (nrow >= rmax) { \
182         /* there is no extra room in row, therefore enlarge */ \
183         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
184         Scalar *new_a; \
185  \
186         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
187  \
188         /* malloc new storage space */ \
189         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
190         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
191         new_j   = (int *) (new_a + bs2*new_nz); \
192         new_i   = new_j + new_nz; \
193  \
194         /* copy over old data into new slots */ \
195         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
196         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
197         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
198         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
199         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
200                            len*sizeof(int));CHKERRQ(ierr); \
201         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
202         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
203         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
204                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
205         /* free up old matrix storage */ \
206         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
207         if (!b->singlemalloc) { \
208           ierr = PetscFree(b->i);CHKERRQ(ierr); \
209           ierr = PetscFree(b->j);CHKERRQ(ierr); \
210         } \
211         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
212         b->singlemalloc = 1; \
213  \
214         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
215         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
216         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
217         b->maxnz += bs2*CHUNKSIZE; \
218         b->reallocs++; \
219         b->nz++; \
220       } \
221       N = nrow++ - 1;  \
222       /* shift up all the later entries in this row */ \
223       for ( ii=N; ii>=_i; ii-- ) { \
224         rp[ii+1] = rp[ii]; \
225         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
226       } \
227       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
228       rp[_i]                      = bcol;  \
229       ap[bs2*_i + bs*cidx + ridx] = value;  \
230       b_noinsert:; \
231     bilen[brow] = nrow; \
232 }
233 
234 #undef __FUNC__
235 #define __FUNC__ "MatSetValues_MPIBAIJ"
236 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
237 {
238   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
239   Scalar      value;
240   int         ierr,i,j,row,col;
241   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
242   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
243   int         cend_orig=baij->cend_bs,bs=baij->bs;
244 
245   /* Some Variables required in the macro */
246   Mat         A = baij->A;
247   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
248   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
249   Scalar      *aa=a->a;
250 
251   Mat         B = baij->B;
252   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
253   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
254   Scalar      *ba=b->a;
255 
256   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
257   int         low,high,t,ridx,cidx,bs2=a->bs2;
258   Scalar      *ap,*bap;
259 
260   PetscFunctionBegin;
261   for ( i=0; i<m; i++ ) {
262     if (im[i] < 0) continue;
263 #if defined(PETSC_USE_BOPT_g)
264     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
265 #endif
266     if (im[i] >= rstart_orig && im[i] < rend_orig) {
267       row = im[i] - rstart_orig;
268       for ( j=0; j<n; j++ ) {
269         if (in[j] >= cstart_orig && in[j] < cend_orig){
270           col = in[j] - cstart_orig;
271           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
272           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
273           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
274         } else if (in[j] < 0) continue;
275 #if defined(PETSC_USE_BOPT_g)
276         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
277 #endif
278         else {
279           if (mat->was_assembled) {
280             if (!baij->colmap) {
281               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
282             }
283 #if defined (PETSC_USE_CTABLE)
284             ierr = PetscTableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
285             col  = col - 1 + in[j]%bs;
286 #else
287             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
290               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
291               col =  in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B = baij->B;
294               b = (Mat_SeqBAIJ *) (B)->data;
295               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
296               ba=b->a;
297             }
298           } else col = in[j];
299           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
300           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
301           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
302         }
303       }
304     } else {
305       if ( !baij->donotstash) {
306         if (roworiented) {
307           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
308         } else {
309           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
310         }
311       }
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNC__
318 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
319 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
320 {
321   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
322   Scalar      *value,*barray=baij->barray;
323   int         ierr,i,j,ii,jj,row,col;
324   int         roworiented = baij->roworiented,rstart=baij->rstart ;
325   int         rend=baij->rend,cstart=baij->cstart,stepval;
326   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
327 
328   PetscFunctionBegin;
329   if(!barray) {
330     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
331   }
332 
333   if (roworiented) {
334     stepval = (n-1)*bs;
335   } else {
336     stepval = (m-1)*bs;
337   }
338   for ( i=0; i<m; i++ ) {
339     if (im[i] < 0) continue;
340 #if defined(PETSC_USE_BOPT_g)
341     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
342 #endif
343     if (im[i] >= rstart && im[i] < rend) {
344       row = im[i] - rstart;
345       for ( j=0; j<n; j++ ) {
346         /* If NumCol = 1 then a copy is not required */
347         if ((roworiented) && (n == 1)) {
348           barray = v + i*bs2;
349         } else if((!roworiented) && (m == 1)) {
350           barray = v + j*bs2;
351         } else { /* Here a copy is required */
352           if (roworiented) {
353             value = v + i*(stepval+bs)*bs + j*bs;
354           } else {
355             value = v + j*(stepval+bs)*bs + i*bs;
356           }
357           for ( ii=0; ii<bs; ii++,value+=stepval ) {
358             for (jj=0; jj<bs; jj++ ) {
359               *barray++  = *value++;
360             }
361           }
362           barray -=bs2;
363         }
364 
365         if (in[j] >= cstart && in[j] < cend){
366           col  = in[j] - cstart;
367           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
368         }
369         else if (in[j] < 0) continue;
370 #if defined(PETSC_USE_BOPT_g)
371         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
372 #endif
373         else {
374           if (mat->was_assembled) {
375             if (!baij->colmap) {
376               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
377             }
378 
379 #if defined(PETSC_USE_BOPT_g)
380 #if defined (PETSC_USE_CTABLE)
381             { int data;
382               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
383               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
384             }
385 #else
386             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
387 #endif
388 #endif
389 #if defined (PETSC_USE_CTABLE)
390 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
391             col  = (col - 1)/bs;
392 #else
393             col = (baij->colmap[in[j]] - 1)/bs;
394 #endif
395             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
396               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
397               col =  in[j];
398             }
399           }
400           else col = in[j];
401           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
402         }
403       }
404     } else {
405       if (!baij->donotstash) {
406         if (roworiented) {
407           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
408         } else {
409           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
410         }
411       }
412     }
413   }
414   PetscFunctionReturn(0);
415 }
416 #define HASH_KEY 0.6180339887
417 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
418 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
419 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
420 #undef __FUNC__
421 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
422 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
423 {
424   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
425   int         ierr,i,j,row,col;
426   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
427   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
428   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
429   double      tmp;
430   Scalar      ** HD = baij->hd,value;
431 #if defined(PETSC_USE_BOPT_g)
432   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
433 #endif
434 
435   PetscFunctionBegin;
436 
437   for ( i=0; i<m; i++ ) {
438 #if defined(PETSC_USE_BOPT_g)
439     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
440     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
441 #endif
442       row = im[i];
443     if (row >= rstart_orig && row < rend_orig) {
444       for ( j=0; j<n; j++ ) {
445         col = in[j];
446         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
447         /* Look up into the Hash Table */
448         key = (row/bs)*Nbs+(col/bs)+1;
449         h1  = HASH(size,key,tmp);
450 
451 
452         idx = h1;
453 #if defined(PETSC_USE_BOPT_g)
454         insert_ct++;
455         total_ct++;
456         if (HT[idx] != key) {
457           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
458           if (idx == size) {
459             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
460             if (idx == h1) {
461               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
462             }
463           }
464         }
465 #else
466         if (HT[idx] != key) {
467           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
468           if (idx == size) {
469             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
470             if (idx == h1) {
471               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
472             }
473           }
474         }
475 #endif
476         /* A HASH table entry is found, so insert the values at the correct address */
477         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
478         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
479       }
480     } else {
481       if (!baij->donotstash) {
482         if (roworiented) {
483           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
484         } else {
485           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
486         }
487       }
488     }
489   }
490 #if defined(PETSC_USE_BOPT_g)
491   baij->ht_total_ct = total_ct;
492   baij->ht_insert_ct = insert_ct;
493 #endif
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNC__
498 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
499 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
500 {
501   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
502   int         ierr,i,j,ii,jj,row,col;
503   int         roworiented = baij->roworiented,rstart=baij->rstart ;
504   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
505   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
506   double      tmp;
507   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
508 #if defined(PETSC_USE_BOPT_g)
509   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
510 #endif
511 
512   PetscFunctionBegin;
513 
514   if (roworiented) {
515     stepval = (n-1)*bs;
516   } else {
517     stepval = (m-1)*bs;
518   }
519   for ( i=0; i<m; i++ ) {
520 #if defined(PETSC_USE_BOPT_g)
521     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
522     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
523 #endif
524     row   = im[i];
525     v_t   = v + i*bs2;
526     if (row >= rstart && row < rend) {
527       for ( j=0; j<n; j++ ) {
528         col = in[j];
529 
530         /* Look up into the Hash Table */
531         key = row*Nbs+col+1;
532         h1  = HASH(size,key,tmp);
533 
534         idx = h1;
535 #if defined(PETSC_USE_BOPT_g)
536         total_ct++;
537         insert_ct++;
538        if (HT[idx] != key) {
539           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
540           if (idx == size) {
541             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
542             if (idx == h1) {
543               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
544             }
545           }
546         }
547 #else
548         if (HT[idx] != key) {
549           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
550           if (idx == size) {
551             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
552             if (idx == h1) {
553               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
554             }
555           }
556         }
557 #endif
558         baij_a = HD[idx];
559         if (roworiented) {
560           /*value = v + i*(stepval+bs)*bs + j*bs;*/
561           /* value = v + (i*(stepval+bs)+j)*bs; */
562           value = v_t;
563           v_t  += bs;
564           if (addv == ADD_VALUES) {
565             for ( ii=0; ii<bs; ii++,value+=stepval) {
566               for ( jj=ii; jj<bs2; jj+=bs ) {
567                 baij_a[jj]  += *value++;
568               }
569             }
570           } else {
571             for ( ii=0; ii<bs; ii++,value+=stepval) {
572               for ( jj=ii; jj<bs2; jj+=bs ) {
573                 baij_a[jj]  = *value++;
574               }
575             }
576           }
577         } else {
578           value = v + j*(stepval+bs)*bs + i*bs;
579           if (addv == ADD_VALUES) {
580             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
581               for ( jj=0; jj<bs; jj++ ) {
582                 baij_a[jj]  += *value++;
583               }
584             }
585           } else {
586             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
587               for ( jj=0; jj<bs; jj++ ) {
588                 baij_a[jj]  = *value++;
589               }
590             }
591           }
592         }
593       }
594     } else {
595       if (!baij->donotstash) {
596         if (roworiented) {
597           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
598         } else {
599           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
600         }
601       }
602     }
603   }
604 #if defined(PETSC_USE_BOPT_g)
605   baij->ht_total_ct = total_ct;
606   baij->ht_insert_ct = insert_ct;
607 #endif
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNC__
612 #define __FUNC__ "MatGetValues_MPIBAIJ"
613 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
614 {
615   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
616   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
617   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
618 
619   PetscFunctionBegin;
620   for ( i=0; i<m; i++ ) {
621     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
622     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
623     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
624       row = idxm[i] - bsrstart;
625       for ( j=0; j<n; j++ ) {
626         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
627         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
628         if (idxn[j] >= bscstart && idxn[j] < bscend){
629           col = idxn[j] - bscstart;
630           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
631         } else {
632           if (!baij->colmap) {
633             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
634           }
635 #if defined (PETSC_USE_CTABLE)
636           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
637           data --;
638 #else
639           data = baij->colmap[idxn[j]/bs]-1;
640 #endif
641           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
642           else {
643             col  = data + idxn[j]%bs;
644             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
645           }
646         }
647       }
648     } else {
649       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
650     }
651   }
652  PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNC__
656 #define __FUNC__ "MatNorm_MPIBAIJ"
657 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
658 {
659   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
660   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
661   int        ierr, i,bs2=baij->bs2;
662   double     sum = 0.0;
663   Scalar     *v;
664 
665   PetscFunctionBegin;
666   if (baij->size == 1) {
667     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
668   } else {
669     if (type == NORM_FROBENIUS) {
670       v = amat->a;
671       for (i=0; i<amat->nz*bs2; i++ ) {
672 #if defined(PETSC_USE_COMPLEX)
673         sum += PetscReal(PetscConj(*v)*(*v)); v++;
674 #else
675         sum += (*v)*(*v); v++;
676 #endif
677       }
678       v = bmat->a;
679       for (i=0; i<bmat->nz*bs2; i++ ) {
680 #if defined(PETSC_USE_COMPLEX)
681         sum += PetscReal(PetscConj(*v)*(*v)); v++;
682 #else
683         sum += (*v)*(*v); v++;
684 #endif
685       }
686       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
687       *norm = sqrt(*norm);
688     } else {
689       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
690     }
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 
696 /*
697   Creates the hash table, and sets the table
698   This table is created only once.
699   If new entried need to be added to the matrix
700   then the hash table has to be destroyed and
701   recreated.
702 */
703 #undef __FUNC__
704 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
705 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
706 {
707   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
708   Mat         A = baij->A, B=baij->B;
709   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
710   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
711   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
712   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
713   int         *HT,key;
714   Scalar      **HD;
715   double      tmp;
716 #if defined(PETSC_USE_BOPT_g)
717   int         ct=0,max=0;
718 #endif
719 
720   PetscFunctionBegin;
721   baij->ht_size=(int)(factor*nz);
722   size = baij->ht_size;
723 
724   if (baij->ht) {
725     PetscFunctionReturn(0);
726   }
727 
728   /* Allocate Memory for Hash Table */
729   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
730   baij->ht = (int*)(baij->hd + size);
731   HD = baij->hd;
732   HT = baij->ht;
733 
734 
735   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
736 
737 
738   /* Loop Over A */
739   for ( i=0; i<a->mbs; i++ ) {
740     for ( j=ai[i]; j<ai[i+1]; j++ ) {
741       row = i+rstart;
742       col = aj[j]+cstart;
743 
744       key = row*Nbs + col + 1;
745       h1  = HASH(size,key,tmp);
746       for ( k=0; k<size; k++ ){
747         if (HT[(h1+k)%size] == 0.0) {
748           HT[(h1+k)%size] = key;
749           HD[(h1+k)%size] = a->a + j*bs2;
750           break;
751 #if defined(PETSC_USE_BOPT_g)
752         } else {
753           ct++;
754 #endif
755         }
756       }
757 #if defined(PETSC_USE_BOPT_g)
758       if (k> max) max = k;
759 #endif
760     }
761   }
762   /* Loop Over B */
763   for ( i=0; i<b->mbs; i++ ) {
764     for ( j=bi[i]; j<bi[i+1]; j++ ) {
765       row = i+rstart;
766       col = garray[bj[j]];
767       key = row*Nbs + col + 1;
768       h1  = HASH(size,key,tmp);
769       for ( k=0; k<size; k++ ){
770         if (HT[(h1+k)%size] == 0.0) {
771           HT[(h1+k)%size] = key;
772           HD[(h1+k)%size] = b->a + j*bs2;
773           break;
774 #if defined(PETSC_USE_BOPT_g)
775         } else {
776           ct++;
777 #endif
778         }
779       }
780 #if defined(PETSC_USE_BOPT_g)
781       if (k> max) max = k;
782 #endif
783     }
784   }
785 
786   /* Print Summary */
787 #if defined(PETSC_USE_BOPT_g)
788   for ( i=0,j=0; i<size; i++)
789     if (HT[i]) {j++;}
790   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
791            (j== 0)? 0.0:((double)(ct+j))/j,max);
792 #endif
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNC__
797 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
798 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
799 {
800   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
801   int         ierr,nstash,reallocs;
802   InsertMode  addv;
803 
804   PetscFunctionBegin;
805   if (baij->donotstash) {
806     PetscFunctionReturn(0);
807   }
808 
809   /* make sure all processors are either in INSERTMODE or ADDMODE */
810   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
811   if (addv == (ADD_VALUES|INSERT_VALUES)) {
812     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
813   }
814   mat->insertmode = addv; /* in case this processor had no cache */
815 
816   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
817   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
818   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
819   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
820   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
821   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNC__
826 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
827 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
828 {
829   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
830   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
831   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
832   int         *row,*col,other_disassembled,r1,r2,r3;
833   Scalar      *val;
834   InsertMode  addv = mat->insertmode;
835 
836   PetscFunctionBegin;
837   if (!baij->donotstash) {
838     while (1) {
839       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
840       if (!flg) break;
841 
842       for ( i=0; i<n; ) {
843         /* Now identify the consecutive vals belonging to the same row */
844         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
845         if (j < n) ncols = j-i;
846         else       ncols = n-i;
847         /* Now assemble all these values with a single function call */
848         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
849         i = j;
850       }
851     }
852     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
853     /* Now process the block-stash. Since the values are stashed column-oriented,
854        set the roworiented flag to column oriented, and after MatSetValues()
855        restore the original flags */
856     r1 = baij->roworiented;
857     r2 = a->roworiented;
858     r3 = b->roworiented;
859     baij->roworiented = 0;
860     a->roworiented    = 0;
861     b->roworiented    = 0;
862     while (1) {
863       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
864       if (!flg) break;
865 
866       for ( i=0; i<n; ) {
867         /* Now identify the consecutive vals belonging to the same row */
868         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
869         if (j < n) ncols = j-i;
870         else       ncols = n-i;
871         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
872         i = j;
873       }
874     }
875     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
876     baij->roworiented = r1;
877     a->roworiented    = r2;
878     b->roworiented    = r3;
879   }
880 
881   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
882   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
883 
884   /* determine if any processor has disassembled, if so we must
885      also disassemble ourselfs, in order that we may reassemble. */
886   /*
887      if nonzero structure of submatrix B cannot change then we know that
888      no processor disassembled thus we can skip this stuff
889   */
890   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
891     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
892     if (mat->was_assembled && !other_disassembled) {
893       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
894     }
895   }
896 
897   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
898     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
899   }
900   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
901   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
902 
903 #if defined(PETSC_USE_BOPT_g)
904   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
905     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
906              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
907     baij->ht_total_ct  = 0;
908     baij->ht_insert_ct = 0;
909   }
910 #endif
911   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
912     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
913     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
914     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
915   }
916 
917   if (baij->rowvalues) {
918     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
919     baij->rowvalues = 0;
920   }
921   PetscFunctionReturn(0);
922 }
923 
924 /*
925 #undef __FUNC__
926 #define __FUNC__ "MatView_MPIBAIJ_Binary"
927 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
928 {
929   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
930   int          ierr;
931 
932   PetscFunctionBegin;
933   if (baij->size == 1) {
934     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
935   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
936   PetscFunctionReturn(0);
937 }
938 */
939 
940 #undef __FUNC__
941 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
942 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
943 {
944   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
945   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
946   PetscTruth   isascii,isdraw;
947 
948   PetscFunctionBegin;
949   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
950   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
951   if (isascii) {
952     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
953     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
954       MatInfo info;
955       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
956       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
957       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
958               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
959               baij->bs,(int)info.memory);CHKERRQ(ierr);
960       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
964       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
965       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
966       PetscFunctionReturn(0);
967     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
968       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
969       PetscFunctionReturn(0);
970     }
971   }
972 
973   if (isdraw) {
974     Draw       draw;
975     PetscTruth isnull;
976     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
977     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
978   }
979 
980   if (size == 1) {
981     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
982   } else {
983     /* assemble the entire matrix onto first processor. */
984     Mat         A;
985     Mat_SeqBAIJ *Aloc;
986     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
987     int         mbs = baij->mbs;
988     Scalar      *a;
989 
990     if (!rank) {
991       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
992     } else {
993       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
994     }
995     PLogObjectParent(mat,A);
996 
997     /* copy over the A part */
998     Aloc  = (Mat_SeqBAIJ*) baij->A->data;
999     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1000     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1001 
1002     for ( i=0; i<mbs; i++ ) {
1003       rvals[0] = bs*(baij->rstart + i);
1004       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1005       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1006         col = (baij->cstart+aj[j])*bs;
1007         for (k=0; k<bs; k++ ) {
1008           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1009           col++; a += bs;
1010         }
1011       }
1012     }
1013     /* copy over the B part */
1014     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1015     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1016     for ( i=0; i<mbs; i++ ) {
1017       rvals[0] = bs*(baij->rstart + i);
1018       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1019       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1020         col = baij->garray[aj[j]]*bs;
1021         for (k=0; k<bs; k++ ) {
1022           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1023           col++; a += bs;
1024         }
1025       }
1026     }
1027     ierr = PetscFree(rvals);CHKERRQ(ierr);
1028     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1030     /*
1031        Everyone has to call to draw the matrix since the graphics waits are
1032        synchronized across all processors that share the Draw object
1033     */
1034     if (!rank) {
1035       Viewer sviewer;
1036       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1037       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1038       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1039     }
1040     ierr = MatDestroy(A);CHKERRQ(ierr);
1041   }
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 
1046 
1047 #undef __FUNC__
1048 #define __FUNC__ "MatView_MPIBAIJ"
1049 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1050 {
1051   int        ierr;
1052   PetscTruth isascii,isdraw,issocket,isbinary;
1053 
1054   PetscFunctionBegin;
1055   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1056   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1059   if (isascii || isdraw || issocket || isbinary) {
1060     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1061   } else {
1062     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1063   }
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNC__
1068 #define __FUNC__ "MatDestroy_MPIBAIJ"
1069 int MatDestroy_MPIBAIJ(Mat mat)
1070 {
1071   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1072   int         ierr;
1073 
1074   PetscFunctionBegin;
1075 
1076   if (mat->mapping) {
1077     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1078   }
1079   if (mat->bmapping) {
1080     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1081   }
1082   if (mat->rmap) {
1083     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1084   }
1085   if (mat->cmap) {
1086     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1087   }
1088 #if defined(PETSC_USE_LOG)
1089   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1090 #endif
1091 
1092   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1093   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1094 
1095   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1096   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1097   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1098 #if defined (PETSC_USE_CTABLE)
1099   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1100 #else
1101   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1102 #endif
1103   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1104   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1105   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1106   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1107   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1108   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1109   ierr = PetscFree(baij);CHKERRQ(ierr);
1110   PLogObjectDestroy(mat);
1111   PetscHeaderDestroy(mat);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNC__
1116 #define __FUNC__ "MatMult_MPIBAIJ"
1117 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1118 {
1119   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1120   int         ierr, nt;
1121 
1122   PetscFunctionBegin;
1123   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1124   if (nt != a->n) {
1125     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1126   }
1127   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1128   if (nt != a->m) {
1129     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1130   }
1131   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1132   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1133   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1134   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1135   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNC__
1140 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1141 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1142 {
1143   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1144   int        ierr;
1145 
1146   PetscFunctionBegin;
1147   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1148   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1149   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1150   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNC__
1155 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1156 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1157 {
1158   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1159   int         ierr;
1160 
1161   PetscFunctionBegin;
1162   /* do nondiagonal part */
1163   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1164   /* send it on its way */
1165   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1166   /* do local part */
1167   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1168   /* receive remote parts: note this assumes the values are not actually */
1169   /* inserted in yy until the next line, which is true for my implementation*/
1170   /* but is not perhaps always true. */
1171   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNC__
1176 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1177 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1178 {
1179   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1180   int         ierr;
1181 
1182   PetscFunctionBegin;
1183   /* do nondiagonal part */
1184   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1185   /* send it on its way */
1186   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1187   /* do local part */
1188   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1189   /* receive remote parts: note this assumes the values are not actually */
1190   /* inserted in yy until the next line, which is true for my implementation*/
1191   /* but is not perhaps always true. */
1192   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 /*
1197   This only works correctly for square matrices where the subblock A->A is the
1198    diagonal block
1199 */
1200 #undef __FUNC__
1201 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1202 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1203 {
1204   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1205   int         ierr;
1206 
1207   PetscFunctionBegin;
1208   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1209   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNC__
1214 #define __FUNC__ "MatScale_MPIBAIJ"
1215 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1216 {
1217   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1218   int         ierr;
1219 
1220   PetscFunctionBegin;
1221   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1222   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNC__
1227 #define __FUNC__ "MatGetSize_MPIBAIJ"
1228 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1229 {
1230   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1231 
1232   PetscFunctionBegin;
1233   if (m) *m = mat->M;
1234   if (n) *n = mat->N;
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNC__
1239 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1240 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1241 {
1242   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1243 
1244   PetscFunctionBegin;
1245   *m = mat->m; *n = mat->n;
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNC__
1250 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1251 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1252 {
1253   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1254 
1255   PetscFunctionBegin;
1256   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNC__
1261 #define __FUNC__ "MatGetRow_MPIBAIJ"
1262 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1263 {
1264   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1265   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1266   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1267   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1268   int        *cmap, *idx_p,cstart = mat->cstart;
1269 
1270   PetscFunctionBegin;
1271   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1272   mat->getrowactive = PETSC_TRUE;
1273 
1274   if (!mat->rowvalues && (idx || v)) {
1275     /*
1276         allocate enough space to hold information from the longest row.
1277     */
1278     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1279     int     max = 1,mbs = mat->mbs,tmp;
1280     for ( i=0; i<mbs; i++ ) {
1281       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1282       if (max < tmp) { max = tmp; }
1283     }
1284     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1285     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1286   }
1287 
1288   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1289   lrow = row - brstart;
1290 
1291   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1292   if (!v)   {pvA = 0; pvB = 0;}
1293   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1294   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1295   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1296   nztot = nzA + nzB;
1297 
1298   cmap  = mat->garray;
1299   if (v  || idx) {
1300     if (nztot) {
1301       /* Sort by increasing column numbers, assuming A and B already sorted */
1302       int imark = -1;
1303       if (v) {
1304         *v = v_p = mat->rowvalues;
1305         for ( i=0; i<nzB; i++ ) {
1306           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1307           else break;
1308         }
1309         imark = i;
1310         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1311         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1312       }
1313       if (idx) {
1314         *idx = idx_p = mat->rowindices;
1315         if (imark > -1) {
1316           for ( i=0; i<imark; i++ ) {
1317             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1318           }
1319         } else {
1320           for ( i=0; i<nzB; i++ ) {
1321             if (cmap[cworkB[i]/bs] < cstart)
1322               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1323             else break;
1324           }
1325           imark = i;
1326         }
1327         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1328         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1329       }
1330     } else {
1331       if (idx) *idx = 0;
1332       if (v)   *v   = 0;
1333     }
1334   }
1335   *nz = nztot;
1336   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1337   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNC__
1342 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1343 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1344 {
1345   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1346 
1347   PetscFunctionBegin;
1348   if (baij->getrowactive == PETSC_FALSE) {
1349     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1350   }
1351   baij->getrowactive = PETSC_FALSE;
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNC__
1356 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1357 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1358 {
1359   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1360 
1361   PetscFunctionBegin;
1362   *bs = baij->bs;
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNC__
1367 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1368 int MatZeroEntries_MPIBAIJ(Mat A)
1369 {
1370   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1371   int         ierr;
1372 
1373   PetscFunctionBegin;
1374   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1375   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNC__
1380 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1381 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1382 {
1383   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1384   Mat         A = a->A, B = a->B;
1385   int         ierr;
1386   double      isend[5], irecv[5];
1387 
1388   PetscFunctionBegin;
1389   info->block_size     = (double)a->bs;
1390   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1391   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1392   isend[3] = info->memory;  isend[4] = info->mallocs;
1393   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1394   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1395   isend[3] += info->memory;  isend[4] += info->mallocs;
1396   if (flag == MAT_LOCAL) {
1397     info->nz_used      = isend[0];
1398     info->nz_allocated = isend[1];
1399     info->nz_unneeded  = isend[2];
1400     info->memory       = isend[3];
1401     info->mallocs      = isend[4];
1402   } else if (flag == MAT_GLOBAL_MAX) {
1403     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1404     info->nz_used      = irecv[0];
1405     info->nz_allocated = irecv[1];
1406     info->nz_unneeded  = irecv[2];
1407     info->memory       = irecv[3];
1408     info->mallocs      = irecv[4];
1409   } else if (flag == MAT_GLOBAL_SUM) {
1410     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1411     info->nz_used      = irecv[0];
1412     info->nz_allocated = irecv[1];
1413     info->nz_unneeded  = irecv[2];
1414     info->memory       = irecv[3];
1415     info->mallocs      = irecv[4];
1416   } else {
1417     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1418   }
1419   info->rows_global       = (double)a->M;
1420   info->columns_global    = (double)a->N;
1421   info->rows_local        = (double)a->m;
1422   info->columns_local     = (double)a->N;
1423   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1424   info->fill_ratio_needed = 0;
1425   info->factor_mallocs    = 0;
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNC__
1430 #define __FUNC__ "MatSetOption_MPIBAIJ"
1431 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1432 {
1433   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1434   int         ierr;
1435 
1436   PetscFunctionBegin;
1437   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1438       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1439       op == MAT_COLUMNS_UNSORTED ||
1440       op == MAT_COLUMNS_SORTED ||
1441       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1442       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1443         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1444         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1445   } else if (op == MAT_ROW_ORIENTED) {
1446         a->roworiented = 1;
1447         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1448         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1449   } else if (op == MAT_ROWS_SORTED ||
1450              op == MAT_ROWS_UNSORTED ||
1451              op == MAT_SYMMETRIC ||
1452              op == MAT_STRUCTURALLY_SYMMETRIC ||
1453              op == MAT_YES_NEW_DIAGONALS ||
1454              op == MAT_USE_HASH_TABLE) {
1455     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1456   } else if (op == MAT_COLUMN_ORIENTED) {
1457     a->roworiented = 0;
1458     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1459     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1460   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1461     a->donotstash = 1;
1462   } else if (op == MAT_NO_NEW_DIAGONALS) {
1463     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1464   } else if (op == MAT_USE_HASH_TABLE) {
1465     a->ht_flag = 1;
1466   } else {
1467     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1468   }
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNC__
1473 #define __FUNC__ "MatTranspose_MPIBAIJ("
1474 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1475 {
1476   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1477   Mat_SeqBAIJ *Aloc;
1478   Mat        B;
1479   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1480   int        bs=baij->bs,mbs=baij->mbs;
1481   Scalar     *a;
1482 
1483   PetscFunctionBegin;
1484   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1485   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1486 CHKERRQ(ierr);
1487 
1488   /* copy over the A part */
1489   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1490   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1491   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1492 
1493   for ( i=0; i<mbs; i++ ) {
1494     rvals[0] = bs*(baij->rstart + i);
1495     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1496     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1497       col = (baij->cstart+aj[j])*bs;
1498       for (k=0; k<bs; k++ ) {
1499         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1500         col++; a += bs;
1501       }
1502     }
1503   }
1504   /* copy over the B part */
1505   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1506   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1507   for ( i=0; i<mbs; i++ ) {
1508     rvals[0] = bs*(baij->rstart + i);
1509     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1510     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1511       col = baij->garray[aj[j]]*bs;
1512       for (k=0; k<bs; k++ ) {
1513         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1514         col++; a += bs;
1515       }
1516     }
1517   }
1518   ierr = PetscFree(rvals);CHKERRQ(ierr);
1519   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1520   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1521 
1522   if (matout != PETSC_NULL) {
1523     *matout = B;
1524   } else {
1525     PetscOps *Abops;
1526     MatOps   Aops;
1527 
1528     /* This isn't really an in-place transpose .... but free data structures from baij */
1529     ierr = PetscFree(baij->rowners); CHKERRQ(ierr);
1530     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1531     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1532 #if defined (PETSC_USE_CTABLE)
1533     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1534 #else
1535     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1536 #endif
1537     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1538     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1539     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1540     ierr = PetscFree(baij);CHKERRQ(ierr);
1541 
1542     /*
1543        This is horrible, horrible code. We need to keep the
1544       A pointers for the bops and ops but copy everything
1545       else from C.
1546     */
1547     Abops   = A->bops;
1548     Aops    = A->ops;
1549     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1550     A->bops = Abops;
1551     A->ops  = Aops;
1552 
1553     PetscHeaderDestroy(B);
1554   }
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNC__
1559 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1560 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1561 {
1562   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1563   Mat         a = baij->A, b = baij->B;
1564   int         ierr,s1,s2,s3;
1565 
1566   PetscFunctionBegin;
1567   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1568   if (rr) {
1569     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1570     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1571     /* Overlap communication with computation. */
1572     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1573   }
1574   if (ll) {
1575     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1576     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1577     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1578   }
1579   /* scale  the diagonal block */
1580   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1581 
1582   if (rr) {
1583     /* Do a scatter end and then right scale the off-diagonal block */
1584     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1585     ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr);
1586   }
1587 
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNC__
1592 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1593 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1594 {
1595   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1596   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1597   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1598   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1599   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1600   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1601   MPI_Comm       comm = A->comm;
1602   MPI_Request    *send_waits,*recv_waits;
1603   MPI_Status     recv_status,*send_status;
1604   IS             istmp;
1605 
1606   PetscFunctionBegin;
1607   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1608   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1609 
1610   /*  first count number of contributors to each processor */
1611   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1612   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1613   procs  = nprocs + size;
1614   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1615   for ( i=0; i<N; i++ ) {
1616     idx   = rows[i];
1617     found = 0;
1618     for ( j=0; j<size; j++ ) {
1619       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1620         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1621       }
1622     }
1623     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1624   }
1625   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1626 
1627   /* inform other processors of number of messages and max length*/
1628   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
1629   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1630   nmax   = work[rank];
1631   nrecvs = work[size+rank];
1632   ierr = PetscFree(work);CHKERRQ(ierr);
1633 
1634   /* post receives:   */
1635   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1636   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1637   for ( i=0; i<nrecvs; i++ ) {
1638     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1639   }
1640 
1641   /* do sends:
1642      1) starts[i] gives the starting index in svalues for stuff going to
1643      the ith processor
1644   */
1645   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1646   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1647   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
1648   starts[0]  = 0;
1649   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1650   for ( i=0; i<N; i++ ) {
1651     svalues[starts[owner[i]]++] = rows[i];
1652   }
1653   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1654 
1655   starts[0] = 0;
1656   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1657   count = 0;
1658   for ( i=0; i<size; i++ ) {
1659     if (procs[i]) {
1660       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1661     }
1662   }
1663   ierr = PetscFree(starts);CHKERRQ(ierr);
1664 
1665   base = owners[rank]*bs;
1666 
1667   /*  wait on receives */
1668   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
1669   source = lens + nrecvs;
1670   count  = nrecvs; slen = 0;
1671   while (count) {
1672     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1673     /* unpack receives into our local space */
1674     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1675     source[imdex]  = recv_status.MPI_SOURCE;
1676     lens[imdex]    = n;
1677     slen          += n;
1678     count--;
1679   }
1680   ierr = PetscFree(recv_waits); CHKERRQ(ierr);
1681 
1682   /* move the data into the send scatter */
1683   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
1684   count = 0;
1685   for ( i=0; i<nrecvs; i++ ) {
1686     values = rvalues + i*nmax;
1687     for ( j=0; j<lens[i]; j++ ) {
1688       lrows[count++] = values[j] - base;
1689     }
1690   }
1691   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1692   ierr = PetscFree(lens);CHKERRQ(ierr);
1693   ierr = PetscFree(owner);CHKERRQ(ierr);
1694   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1695 
1696   /* actually zap the local rows */
1697   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1698   PLogObjectParent(A,istmp);
1699 
1700   /*
1701         Zero the required rows. If the "diagonal block" of the matrix
1702      is square and the user wishes to set the diagonal we use seperate
1703      code so that MatSetValues() is not called for each diagonal allocating
1704      new memory, thus calling lots of mallocs and slowing things down.
1705 
1706        Contributed by: Mathew Knepley
1707   */
1708   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1709   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
1710   if (diag && (l->A->M == l->A->N)) {
1711     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
1712   } else if (diag) {
1713     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1714     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1715       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1716 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1717     }
1718     for ( i = 0; i < slen; i++ ) {
1719       row  = lrows[i] + rstart_bs;
1720       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1721     }
1722     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724   } else {
1725     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1726   }
1727 
1728   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1729   ierr = PetscFree(lrows);CHKERRQ(ierr);
1730 
1731   /* wait on sends */
1732   if (nsends) {
1733     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1734     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1735     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1736   }
1737   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1738   ierr = PetscFree(svalues);CHKERRQ(ierr);
1739 
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNC__
1744 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1745 int MatPrintHelp_MPIBAIJ(Mat A)
1746 {
1747   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1748   MPI_Comm    comm = A->comm;
1749   static int  called = 0;
1750   int         ierr;
1751 
1752   PetscFunctionBegin;
1753   if (!a->rank) {
1754     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1755   }
1756   if (called) {PetscFunctionReturn(0);} else called = 1;
1757   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1758   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 #undef __FUNC__
1763 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1764 int MatSetUnfactored_MPIBAIJ(Mat A)
1765 {
1766   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1767   int         ierr;
1768 
1769   PetscFunctionBegin;
1770   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1775 
1776 #undef __FUNC__
1777 #define __FUNC__ "MatEqual_MPIBAIJ"
1778 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
1779 {
1780   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
1781   Mat         a, b, c, d;
1782   PetscTruth  flg;
1783   int         ierr;
1784 
1785   PetscFunctionBegin;
1786   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1787   a = matA->A; b = matA->B;
1788   c = matB->A; d = matB->B;
1789 
1790   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1791   if (flg == PETSC_TRUE) {
1792     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1793   }
1794   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 /* -------------------------------------------------------------------*/
1799 static struct _MatOps MatOps_Values = {
1800   MatSetValues_MPIBAIJ,
1801   MatGetRow_MPIBAIJ,
1802   MatRestoreRow_MPIBAIJ,
1803   MatMult_MPIBAIJ,
1804   MatMultAdd_MPIBAIJ,
1805   MatMultTrans_MPIBAIJ,
1806   MatMultTransAdd_MPIBAIJ,
1807   0,
1808   0,
1809   0,
1810   0,
1811   0,
1812   0,
1813   0,
1814   MatTranspose_MPIBAIJ,
1815   MatGetInfo_MPIBAIJ,
1816   MatEqual_MPIBAIJ,
1817   MatGetDiagonal_MPIBAIJ,
1818   MatDiagonalScale_MPIBAIJ,
1819   MatNorm_MPIBAIJ,
1820   MatAssemblyBegin_MPIBAIJ,
1821   MatAssemblyEnd_MPIBAIJ,
1822   0,
1823   MatSetOption_MPIBAIJ,
1824   MatZeroEntries_MPIBAIJ,
1825   MatZeroRows_MPIBAIJ,
1826   0,
1827   0,
1828   0,
1829   0,
1830   MatGetSize_MPIBAIJ,
1831   MatGetLocalSize_MPIBAIJ,
1832   MatGetOwnershipRange_MPIBAIJ,
1833   0,
1834   0,
1835   0,
1836   0,
1837   MatDuplicate_MPIBAIJ,
1838   0,
1839   0,
1840   0,
1841   0,
1842   0,
1843   MatGetSubMatrices_MPIBAIJ,
1844   MatIncreaseOverlap_MPIBAIJ,
1845   MatGetValues_MPIBAIJ,
1846   0,
1847   MatPrintHelp_MPIBAIJ,
1848   MatScale_MPIBAIJ,
1849   0,
1850   0,
1851   0,
1852   MatGetBlockSize_MPIBAIJ,
1853   0,
1854   0,
1855   0,
1856   0,
1857   0,
1858   0,
1859   MatSetUnfactored_MPIBAIJ,
1860   0,
1861   MatSetValuesBlocked_MPIBAIJ,
1862   0,
1863   0,
1864   0,
1865   MatGetMaps_Petsc};
1866 
1867 
1868 EXTERN_C_BEGIN
1869 #undef __FUNC__
1870 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1871 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1872 {
1873   PetscFunctionBegin;
1874   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1875   *iscopy = PETSC_FALSE;
1876   PetscFunctionReturn(0);
1877 }
1878 EXTERN_C_END
1879 
1880 #undef __FUNC__
1881 #define __FUNC__ "MatCreateMPIBAIJ"
1882 /*@C
1883    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1884    (block compressed row).  For good matrix assembly performance
1885    the user should preallocate the matrix storage by setting the parameters
1886    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1887    performance can be increased by more than a factor of 50.
1888 
1889    Collective on MPI_Comm
1890 
1891    Input Parameters:
1892 +  comm - MPI communicator
1893 .  bs   - size of blockk
1894 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1895            This value should be the same as the local size used in creating the
1896            y vector for the matrix-vector product y = Ax.
1897 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1898            This value should be the same as the local size used in creating the
1899            x vector for the matrix-vector product y = Ax.
1900 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1901 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1902 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1903            submatrix  (same for all local rows)
1904 .  d_nnz - array containing the number of block nonzeros in the various block rows
1905            of the in diagonal portion of the local (possibly different for each block
1906            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1907 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1908            submatrix (same for all local rows).
1909 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1910            off-diagonal portion of the local submatrix (possibly different for
1911            each block row) or PETSC_NULL.
1912 
1913    Output Parameter:
1914 .  A - the matrix
1915 
1916    Options Database Keys:
1917 .   -mat_no_unroll - uses code that does not unroll the loops in the
1918                      block calculations (much slower)
1919 .   -mat_block_size - size of the blocks to use
1920 .   -mat_mpi - use the parallel matrix data structures even on one processor
1921                (defaults to using SeqBAIJ format on one processor)
1922 
1923    Notes:
1924    The user MUST specify either the local or global matrix dimensions
1925    (possibly both).
1926 
1927    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1928    than it must be used on all processors that share the object for that argument.
1929 
1930    Storage Information:
1931    For a square global matrix we define each processor's diagonal portion
1932    to be its local rows and the corresponding columns (a square submatrix);
1933    each processor's off-diagonal portion encompasses the remainder of the
1934    local matrix (a rectangular submatrix).
1935 
1936    The user can specify preallocated storage for the diagonal part of
1937    the local submatrix with either d_nz or d_nnz (not both).  Set
1938    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1939    memory allocation.  Likewise, specify preallocated storage for the
1940    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1941 
1942    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1943    the figure below we depict these three local rows and all columns (0-11).
1944 
1945 .vb
1946            0 1 2 3 4 5 6 7 8 9 10 11
1947           -------------------
1948    row 3  |  o o o d d d o o o o o o
1949    row 4  |  o o o d d d o o o o o o
1950    row 5  |  o o o d d d o o o o o o
1951           -------------------
1952 .ve
1953 
1954    Thus, any entries in the d locations are stored in the d (diagonal)
1955    submatrix, and any entries in the o locations are stored in the
1956    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1957    stored simply in the MATSEQBAIJ format for compressed row storage.
1958 
1959    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1960    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1961    In general, for PDE problems in which most nonzeros are near the diagonal,
1962    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1963    or you will get TERRIBLE performance; see the users' manual chapter on
1964    matrices.
1965 
1966    Level: intermediate
1967 
1968 .keywords: matrix, block, aij, compressed row, sparse, parallel
1969 
1970 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1971 @*/
1972 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1973                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1974 {
1975   Mat          B;
1976   Mat_MPIBAIJ  *b;
1977   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1978   int          flag1 = 0,flag2 = 0;
1979 
1980   PetscFunctionBegin;
1981   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1982 
1983   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1984   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
1985   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
1986   if (d_nnz) {
1987     for (i=0; i<m/bs; i++) {
1988       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1989     }
1990   }
1991   if (o_nnz) {
1992     for (i=0; i<m/bs; i++) {
1993       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1994     }
1995   }
1996 
1997   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1998   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
1999   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2000   if (!flag1 && !flag2 && size == 1) {
2001     if (M == PETSC_DECIDE) M = m;
2002     if (N == PETSC_DECIDE) N = n;
2003     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2004     PetscFunctionReturn(0);
2005   }
2006 
2007   *A = 0;
2008   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2009   PLogObjectCreate(B);
2010   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2011   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2012   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2013 
2014   B->ops->destroy    = MatDestroy_MPIBAIJ;
2015   B->ops->view       = MatView_MPIBAIJ;
2016   B->mapping    = 0;
2017   B->factor     = 0;
2018   B->assembled  = PETSC_FALSE;
2019 
2020   B->insertmode = NOT_SET_VALUES;
2021   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2022   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2023 
2024   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2025     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2026   }
2027   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2028     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2029   }
2030   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2031     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2032   }
2033   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2034   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2035 
2036   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2037     work[0] = m; work[1] = n;
2038     mbs = m/bs; nbs = n/bs;
2039     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2040     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2041     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2042   }
2043   if (m == PETSC_DECIDE) {
2044     Mbs = M/bs;
2045     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2046     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2047     m   = mbs*bs;
2048   }
2049   if (n == PETSC_DECIDE) {
2050     Nbs = N/bs;
2051     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2052     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2053     n   = nbs*bs;
2054   }
2055   if (mbs*bs != m || nbs*bs != n) {
2056     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2057   }
2058 
2059   b->m = m; B->m = m;
2060   b->n = n; B->n = n;
2061   b->N = N; B->N = N;
2062   b->M = M; B->M = M;
2063   b->bs  = bs;
2064   b->bs2 = bs*bs;
2065   b->mbs = mbs;
2066   b->nbs = nbs;
2067   b->Mbs = Mbs;
2068   b->Nbs = Nbs;
2069 
2070   /* the information in the maps duplicates the information computed below, eventually
2071      we should remove the duplicate information that is not contained in the maps */
2072   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2073   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2074 
2075   /* build local table of row and column ownerships */
2076   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2077   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2078   b->cowners    = b->rowners + b->size + 2;
2079   b->rowners_bs = b->cowners + b->size + 2;
2080   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2081   b->rowners[0]    = 0;
2082   for ( i=2; i<=b->size; i++ ) {
2083     b->rowners[i] += b->rowners[i-1];
2084   }
2085   for ( i=0; i<=b->size; i++ ) {
2086     b->rowners_bs[i] = b->rowners[i]*bs;
2087   }
2088   b->rstart    = b->rowners[b->rank];
2089   b->rend      = b->rowners[b->rank+1];
2090   b->rstart_bs = b->rstart * bs;
2091   b->rend_bs   = b->rend * bs;
2092 
2093   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2094   b->cowners[0] = 0;
2095   for ( i=2; i<=b->size; i++ ) {
2096     b->cowners[i] += b->cowners[i-1];
2097   }
2098   b->cstart    = b->cowners[b->rank];
2099   b->cend      = b->cowners[b->rank+1];
2100   b->cstart_bs = b->cstart * bs;
2101   b->cend_bs   = b->cend * bs;
2102 
2103 
2104   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2105   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2106   PLogObjectParent(B,b->A);
2107   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2108   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2109   PLogObjectParent(B,b->B);
2110 
2111   /* build cache for off array entries formed */
2112   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2113   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2114   b->donotstash  = 0;
2115   b->colmap      = 0;
2116   b->garray      = 0;
2117   b->roworiented = 1;
2118 
2119   /* stuff used in block assembly */
2120   b->barray       = 0;
2121 
2122   /* stuff used for matrix vector multiply */
2123   b->lvec         = 0;
2124   b->Mvctx        = 0;
2125 
2126   /* stuff for MatGetRow() */
2127   b->rowindices   = 0;
2128   b->rowvalues    = 0;
2129   b->getrowactive = PETSC_FALSE;
2130 
2131   /* hash table stuff */
2132   b->ht           = 0;
2133   b->hd           = 0;
2134   b->ht_size      = 0;
2135   b->ht_flag      = 0;
2136   b->ht_fact      = 0;
2137   b->ht_total_ct  = 0;
2138   b->ht_insert_ct = 0;
2139 
2140   *A = B;
2141   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2142   if (flg) {
2143     double fact = 1.39;
2144     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2145     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr);
2146     if (fact <= 1.0) fact = 1.39;
2147     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2148     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2149   }
2150   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
2151                                      "MatStoreValues_MPIBAIJ",
2152                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2153   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
2154                                      "MatRetrieveValues_MPIBAIJ",
2155                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2156   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2157                                      "MatGetDiagonalBlock_MPIBAIJ",
2158                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 #undef __FUNC__
2163 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2164 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2165 {
2166   Mat         mat;
2167   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2168   int         ierr, len=0, flg;
2169 
2170   PetscFunctionBegin;
2171   *newmat       = 0;
2172   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2173   PLogObjectCreate(mat);
2174   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2175   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2176   mat->ops->destroy = MatDestroy_MPIBAIJ;
2177   mat->ops->view    = MatView_MPIBAIJ;
2178   mat->factor       = matin->factor;
2179   mat->assembled    = PETSC_TRUE;
2180   mat->insertmode   = NOT_SET_VALUES;
2181 
2182   a->m = mat->m   = oldmat->m;
2183   a->n = mat->n   = oldmat->n;
2184   a->M = mat->M   = oldmat->M;
2185   a->N = mat->N   = oldmat->N;
2186 
2187   a->bs  = oldmat->bs;
2188   a->bs2 = oldmat->bs2;
2189   a->mbs = oldmat->mbs;
2190   a->nbs = oldmat->nbs;
2191   a->Mbs = oldmat->Mbs;
2192   a->Nbs = oldmat->Nbs;
2193 
2194   a->rstart       = oldmat->rstart;
2195   a->rend         = oldmat->rend;
2196   a->cstart       = oldmat->cstart;
2197   a->cend         = oldmat->cend;
2198   a->size         = oldmat->size;
2199   a->rank         = oldmat->rank;
2200   a->donotstash   = oldmat->donotstash;
2201   a->roworiented  = oldmat->roworiented;
2202   a->rowindices   = 0;
2203   a->rowvalues    = 0;
2204   a->getrowactive = PETSC_FALSE;
2205   a->barray       = 0;
2206   a->rstart_bs    = oldmat->rstart_bs;
2207   a->rend_bs      = oldmat->rend_bs;
2208   a->cstart_bs    = oldmat->cstart_bs;
2209   a->cend_bs      = oldmat->cend_bs;
2210 
2211   /* hash table stuff */
2212   a->ht           = 0;
2213   a->hd           = 0;
2214   a->ht_size      = 0;
2215   a->ht_flag      = oldmat->ht_flag;
2216   a->ht_fact      = oldmat->ht_fact;
2217   a->ht_total_ct  = 0;
2218   a->ht_insert_ct = 0;
2219 
2220 
2221   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2222   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2223   a->cowners    = a->rowners + a->size + 2;
2224   a->rowners_bs = a->cowners + a->size + 2;
2225   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2226   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2227   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2228   if (oldmat->colmap) {
2229 #if defined (PETSC_USE_CTABLE)
2230   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2231 #else
2232     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2233     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2234     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2235 #endif
2236   } else a->colmap = 0;
2237   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2238     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2239     PLogObjectMemory(mat,len*sizeof(int));
2240     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2241   } else a->garray = 0;
2242 
2243   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2244   PLogObjectParent(mat,a->lvec);
2245   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2246 
2247   PLogObjectParent(mat,a->Mvctx);
2248   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2249   PLogObjectParent(mat,a->A);
2250   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2251   PLogObjectParent(mat,a->B);
2252   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2253   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2254   if (flg) {
2255     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2256   }
2257   *newmat = mat;
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 #include "sys.h"
2262 
2263 #undef __FUNC__
2264 #define __FUNC__ "MatLoad_MPIBAIJ"
2265 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2266 {
2267   Mat          A;
2268   int          i, nz, ierr, j,rstart, rend, fd;
2269   Scalar       *vals,*buf;
2270   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2271   MPI_Status   status;
2272   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2273   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2274   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2275   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2276   int          dcount,kmax,k,nzcount,tmp;
2277 
2278   PetscFunctionBegin;
2279   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2280 
2281   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2282   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2283   if (!rank) {
2284     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2285     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2286     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2287     if (header[3] < 0) {
2288       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2289     }
2290   }
2291 
2292   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2293   M = header[1]; N = header[2];
2294 
2295   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2296 
2297   /*
2298      This code adds extra rows to make sure the number of rows is
2299      divisible by the blocksize
2300   */
2301   Mbs        = M/bs;
2302   extra_rows = bs - M + bs*(Mbs);
2303   if (extra_rows == bs) extra_rows = 0;
2304   else                  Mbs++;
2305   if (extra_rows &&!rank) {
2306     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2307   }
2308 
2309   /* determine ownership of all rows */
2310   mbs = Mbs/size + ((Mbs % size) > rank);
2311   m   = mbs * bs;
2312   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2313   browners = rowners + size + 1;
2314   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2315   rowners[0] = 0;
2316   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2317   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2318   rstart = rowners[rank];
2319   rend   = rowners[rank+1];
2320 
2321   /* distribute row lengths to all processors */
2322   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
2323   if (!rank) {
2324     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2325     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2326     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2327     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2328     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2329     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2330     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2331   } else {
2332     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2333   }
2334 
2335   if (!rank) {
2336     /* calculate the number of nonzeros on each processor */
2337     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2338     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2339     for ( i=0; i<size; i++ ) {
2340       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2341         procsnz[i] += rowlengths[j];
2342       }
2343     }
2344     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2345 
2346     /* determine max buffer needed and allocate it */
2347     maxnz = 0;
2348     for ( i=0; i<size; i++ ) {
2349       maxnz = PetscMax(maxnz,procsnz[i]);
2350     }
2351     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2352 
2353     /* read in my part of the matrix column indices  */
2354     nz = procsnz[0];
2355     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2356     mycols = ibuf;
2357     if (size == 1)  nz -= extra_rows;
2358     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2359     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2360 
2361     /* read in every ones (except the last) and ship off */
2362     for ( i=1; i<size-1; i++ ) {
2363       nz   = procsnz[i];
2364       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2365       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2366     }
2367     /* read in the stuff for the last proc */
2368     if ( size != 1 ) {
2369       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2370       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2371       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2372       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2373     }
2374     ierr = PetscFree(cols);CHKERRQ(ierr);
2375   } else {
2376     /* determine buffer space needed for message */
2377     nz = 0;
2378     for ( i=0; i<m; i++ ) {
2379       nz += locrowlens[i];
2380     }
2381     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2382     mycols = ibuf;
2383     /* receive message of column indices*/
2384     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2385     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2386     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2387   }
2388 
2389   /* loop over local rows, determining number of off diagonal entries */
2390   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2391   odlens = dlens + (rend-rstart);
2392   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2393   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2394   masked1 = mask    + Mbs;
2395   masked2 = masked1 + Mbs;
2396   rowcount = 0; nzcount = 0;
2397   for ( i=0; i<mbs; i++ ) {
2398     dcount  = 0;
2399     odcount = 0;
2400     for ( j=0; j<bs; j++ ) {
2401       kmax = locrowlens[rowcount];
2402       for ( k=0; k<kmax; k++ ) {
2403         tmp = mycols[nzcount++]/bs;
2404         if (!mask[tmp]) {
2405           mask[tmp] = 1;
2406           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2407           else masked1[dcount++] = tmp;
2408         }
2409       }
2410       rowcount++;
2411     }
2412 
2413     dlens[i]  = dcount;
2414     odlens[i] = odcount;
2415 
2416     /* zero out the mask elements we set */
2417     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2418     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2419   }
2420 
2421   /* create our matrix */
2422   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2423   A = *newmat;
2424   MatSetOption(A,MAT_COLUMNS_SORTED);
2425 
2426   if (!rank) {
2427     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
2428     /* read in my part of the matrix numerical values  */
2429     nz = procsnz[0];
2430     vals = buf;
2431     mycols = ibuf;
2432     if (size == 1)  nz -= extra_rows;
2433     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2434     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2435 
2436     /* insert into matrix */
2437     jj      = rstart*bs;
2438     for ( i=0; i<m; i++ ) {
2439       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2440       mycols += locrowlens[i];
2441       vals   += locrowlens[i];
2442       jj++;
2443     }
2444     /* read in other processors (except the last one) and ship out */
2445     for ( i=1; i<size-1; i++ ) {
2446       nz   = procsnz[i];
2447       vals = buf;
2448       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2449       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2450     }
2451     /* the last proc */
2452     if ( size != 1 ){
2453       nz   = procsnz[i] - extra_rows;
2454       vals = buf;
2455       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2456       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2457       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2458     }
2459     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2460   } else {
2461     /* receive numeric values */
2462     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
2463 
2464     /* receive message of values*/
2465     vals   = buf;
2466     mycols = ibuf;
2467     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2468     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2469     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2470 
2471     /* insert into matrix */
2472     jj      = rstart*bs;
2473     for ( i=0; i<m; i++ ) {
2474       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2475       mycols += locrowlens[i];
2476       vals   += locrowlens[i];
2477       jj++;
2478     }
2479   }
2480   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2481   ierr = PetscFree(buf);CHKERRQ(ierr);
2482   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2483   ierr = PetscFree(rowners);CHKERRQ(ierr);
2484   ierr = PetscFree(dlens);CHKERRQ(ierr);
2485   ierr = PetscFree(mask);CHKERRQ(ierr);
2486   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2487   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 #undef __FUNC__
2492 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2493 /*@
2494    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2495 
2496    Input Parameters:
2497 .  mat  - the matrix
2498 .  fact - factor
2499 
2500    Collective on Mat
2501 
2502    Level: advanced
2503 
2504   Notes:
2505    This can also be set by the command line option: -mat_use_hash_table fact
2506 
2507 .keywords: matrix, hashtable, factor, HT
2508 
2509 .seealso: MatSetOption()
2510 @*/
2511 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2512 {
2513   Mat_MPIBAIJ *baij;
2514 
2515   PetscFunctionBegin;
2516   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2517   if (mat->type != MATMPIBAIJ) {
2518     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2519   }
2520   baij = (Mat_MPIBAIJ*) mat->data;
2521   baij->ht_fact = fact;
2522   PetscFunctionReturn(0);
2523 }
2524