xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 0f5bd95cca42d693ada6d048329ab39533680180)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.179 1999/10/01 21:21:30 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 EXTERN_C_BEGIN
20 #undef __FUNC__
21 #define __FUNC__ "MatStoreValues_MPIBAIJ"
22 int MatStoreValues_MPIBAIJ(Mat mat)
23 {
24   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
25   int         ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
29   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 EXTERN_C_END
33 
34 EXTERN_C_BEGIN
35 #undef __FUNC__
36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
37 int MatRetrieveValues_MPIBAIJ(Mat mat)
38 {
39   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
40   int         ierr;
41 
42   PetscFunctionBegin;
43   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
44   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
45   PetscFunctionReturn(0);
46 }
47 EXTERN_C_END
48 
49 /*
50      Local utility routine that creates a mapping from the global column
51    number to the local number in the off-diagonal part of the local
52    storage of the matrix.  This is done in a non scable way since the
53    length of colmap equals the global matrix length.
54 */
55 #undef __FUNC__
56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
57 static int CreateColmap_MPIBAIJ_Private(Mat mat)
58 {
59   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
60   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
61   int         nbs = B->nbs,i,bs=B->bs,ierr;
62 
63   PetscFunctionBegin;
64 #if defined (PETSC_USE_CTABLE)
65   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
66   for ( i=0; i<nbs; i++ ){
67     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
68   }
69 #else
70   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
71   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
72   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
73   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
74 #endif
75   PetscFunctionReturn(0);
76 }
77 
78 #define CHUNKSIZE  10
79 
80 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
81 { \
82  \
83     brow = row/bs;  \
84     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
85     rmax = aimax[brow]; nrow = ailen[brow]; \
86       bcol = col/bs; \
87       ridx = row % bs; cidx = col % bs; \
88       low = 0; high = nrow; \
89       while (high-low > 3) { \
90         t = (low+high)/2; \
91         if (rp[t] > bcol) high = t; \
92         else              low  = t; \
93       } \
94       for ( _i=low; _i<high; _i++ ) { \
95         if (rp[_i] > bcol) break; \
96         if (rp[_i] == bcol) { \
97           bap  = ap +  bs2*_i + bs*cidx + ridx; \
98           if (addv == ADD_VALUES) *bap += value;  \
99           else                    *bap  = value;  \
100           goto a_noinsert; \
101         } \
102       } \
103       if (a->nonew == 1) goto a_noinsert; \
104       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
105       if (nrow >= rmax) { \
106         /* there is no extra room in row, therefore enlarge */ \
107         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
108         Scalar *new_a; \
109  \
110         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
111  \
112         /* malloc new storage space */ \
113         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
114         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
115         new_j   = (int *) (new_a + bs2*new_nz); \
116         new_i   = new_j + new_nz; \
117  \
118         /* copy over old data into new slots */ \
119         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
120         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
121         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
122         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
123         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
124                           len*sizeof(int));CHKERRQ(ierr); \
125         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
126         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
127         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
128                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
129         /* free up old matrix storage */ \
130         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
131         if (!a->singlemalloc) { \
132           ierr = PetscFree(a->i);CHKERRQ(ierr); \
133           ierr = PetscFree(a->j);CHKERRQ(ierr);\
134         } \
135         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
136         a->singlemalloc = 1; \
137  \
138         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
139         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
140         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
141         a->maxnz += bs2*CHUNKSIZE; \
142         a->reallocs++; \
143         a->nz++; \
144       } \
145       N = nrow++ - 1;  \
146       /* shift up all the later entries in this row */ \
147       for ( ii=N; ii>=_i; ii-- ) { \
148         rp[ii+1] = rp[ii]; \
149         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
150       } \
151       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
152       rp[_i]                      = bcol;  \
153       ap[bs2*_i + bs*cidx + ridx] = value;  \
154       a_noinsert:; \
155     ailen[brow] = nrow; \
156 }
157 
158 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
159 { \
160  \
161     brow = row/bs;  \
162     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
163     rmax = bimax[brow]; nrow = bilen[brow]; \
164       bcol = col/bs; \
165       ridx = row % bs; cidx = col % bs; \
166       low = 0; high = nrow; \
167       while (high-low > 3) { \
168         t = (low+high)/2; \
169         if (rp[t] > bcol) high = t; \
170         else              low  = t; \
171       } \
172       for ( _i=low; _i<high; _i++ ) { \
173         if (rp[_i] > bcol) break; \
174         if (rp[_i] == bcol) { \
175           bap  = ap +  bs2*_i + bs*cidx + ridx; \
176           if (addv == ADD_VALUES) *bap += value;  \
177           else                    *bap  = value;  \
178           goto b_noinsert; \
179         } \
180       } \
181       if (b->nonew == 1) goto b_noinsert; \
182       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
183       if (nrow >= rmax) { \
184         /* there is no extra room in row, therefore enlarge */ \
185         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
186         Scalar *new_a; \
187  \
188         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
189  \
190         /* malloc new storage space */ \
191         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
192         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
193         new_j   = (int *) (new_a + bs2*new_nz); \
194         new_i   = new_j + new_nz; \
195  \
196         /* copy over old data into new slots */ \
197         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
198         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
199         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
200         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
201         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
202                            len*sizeof(int));CHKERRQ(ierr); \
203         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
204         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
205         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
206                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
207         /* free up old matrix storage */ \
208         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
209         if (!b->singlemalloc) { \
210           ierr = PetscFree(b->i);CHKERRQ(ierr); \
211           ierr = PetscFree(b->j);CHKERRQ(ierr); \
212         } \
213         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
214         b->singlemalloc = 1; \
215  \
216         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
217         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
218         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
219         b->maxnz += bs2*CHUNKSIZE; \
220         b->reallocs++; \
221         b->nz++; \
222       } \
223       N = nrow++ - 1;  \
224       /* shift up all the later entries in this row */ \
225       for ( ii=N; ii>=_i; ii-- ) { \
226         rp[ii+1] = rp[ii]; \
227         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
228       } \
229       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
230       rp[_i]                      = bcol;  \
231       ap[bs2*_i + bs*cidx + ridx] = value;  \
232       b_noinsert:; \
233     bilen[brow] = nrow; \
234 }
235 
236 #undef __FUNC__
237 #define __FUNC__ "MatSetValues_MPIBAIJ"
238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
239 {
240   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
241   Scalar      value;
242   int         ierr,i,j,row,col;
243   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
244   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
245   int         cend_orig=baij->cend_bs,bs=baij->bs;
246 
247   /* Some Variables required in the macro */
248   Mat         A = baij->A;
249   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
250   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
251   Scalar      *aa=a->a;
252 
253   Mat         B = baij->B;
254   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
255   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
256   Scalar      *ba=b->a;
257 
258   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
259   int         low,high,t,ridx,cidx,bs2=a->bs2;
260   Scalar      *ap,*bap;
261 
262   PetscFunctionBegin;
263   for ( i=0; i<m; i++ ) {
264     if (im[i] < 0) continue;
265 #if defined(PETSC_USE_BOPT_g)
266     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
267 #endif
268     if (im[i] >= rstart_orig && im[i] < rend_orig) {
269       row = im[i] - rstart_orig;
270       for ( j=0; j<n; j++ ) {
271         if (in[j] >= cstart_orig && in[j] < cend_orig){
272           col = in[j] - cstart_orig;
273           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
274           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
275           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
276         } else if (in[j] < 0) continue;
277 #if defined(PETSC_USE_BOPT_g)
278         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
279 #endif
280         else {
281           if (mat->was_assembled) {
282             if (!baij->colmap) {
283               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
284             }
285 #if defined (PETSC_USE_CTABLE)
286             ierr = PetscTableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
287             col  = col - 1 + in[j]%bs;
288 #else
289             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
290 #endif
291             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
292               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
293               col =  in[j];
294               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
295               B = baij->B;
296               b = (Mat_SeqBAIJ *) (B)->data;
297               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
298               ba=b->a;
299             }
300           } else col = in[j];
301           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
302           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
303           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
304         }
305       }
306     } else {
307       if ( !baij->donotstash) {
308         if (roworiented) {
309           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
310         } else {
311           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
312         }
313       }
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
322 {
323   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
324   Scalar      *value,*barray=baij->barray;
325   int         ierr,i,j,ii,jj,row,col;
326   int         roworiented = baij->roworiented,rstart=baij->rstart ;
327   int         rend=baij->rend,cstart=baij->cstart,stepval;
328   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
329 
330   PetscFunctionBegin;
331   if(!barray) {
332     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
333   }
334 
335   if (roworiented) {
336     stepval = (n-1)*bs;
337   } else {
338     stepval = (m-1)*bs;
339   }
340   for ( i=0; i<m; i++ ) {
341     if (im[i] < 0) continue;
342 #if defined(PETSC_USE_BOPT_g)
343     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
344 #endif
345     if (im[i] >= rstart && im[i] < rend) {
346       row = im[i] - rstart;
347       for ( j=0; j<n; j++ ) {
348         /* If NumCol = 1 then a copy is not required */
349         if ((roworiented) && (n == 1)) {
350           barray = v + i*bs2;
351         } else if((!roworiented) && (m == 1)) {
352           barray = v + j*bs2;
353         } else { /* Here a copy is required */
354           if (roworiented) {
355             value = v + i*(stepval+bs)*bs + j*bs;
356           } else {
357             value = v + j*(stepval+bs)*bs + i*bs;
358           }
359           for ( ii=0; ii<bs; ii++,value+=stepval ) {
360             for (jj=0; jj<bs; jj++ ) {
361               *barray++  = *value++;
362             }
363           }
364           barray -=bs2;
365         }
366 
367         if (in[j] >= cstart && in[j] < cend){
368           col  = in[j] - cstart;
369           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
370         }
371         else if (in[j] < 0) continue;
372 #if defined(PETSC_USE_BOPT_g)
373         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
374 #endif
375         else {
376           if (mat->was_assembled) {
377             if (!baij->colmap) {
378               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
379             }
380 
381 #if defined(PETSC_USE_BOPT_g)
382 #if defined (PETSC_USE_CTABLE)
383             { int data;
384               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
385               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
386             }
387 #else
388             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
389 #endif
390 #endif
391 #if defined (PETSC_USE_CTABLE)
392 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
393             col  = (col - 1)/bs;
394 #else
395             col = (baij->colmap[in[j]] - 1)/bs;
396 #endif
397             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
398               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
399               col =  in[j];
400             }
401           }
402           else col = in[j];
403           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
404         }
405       }
406     } else {
407       if (!baij->donotstash) {
408         if (roworiented) {
409           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
410         } else {
411           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         }
413       }
414     }
415   }
416   PetscFunctionReturn(0);
417 }
418 #define HASH_KEY 0.6180339887
419 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
420 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
421 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
422 #undef __FUNC__
423 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
424 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
425 {
426   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
427   int         ierr,i,j,row,col;
428   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
429   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
430   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
431   double      tmp;
432   Scalar      ** HD = baij->hd,value;
433 #if defined(PETSC_USE_BOPT_g)
434   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
435 #endif
436 
437   PetscFunctionBegin;
438 
439   for ( i=0; i<m; i++ ) {
440 #if defined(PETSC_USE_BOPT_g)
441     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
442     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
443 #endif
444       row = im[i];
445     if (row >= rstart_orig && row < rend_orig) {
446       for ( j=0; j<n; j++ ) {
447         col = in[j];
448         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
449         /* Look up into the Hash Table */
450         key = (row/bs)*Nbs+(col/bs)+1;
451         h1  = HASH(size,key,tmp);
452 
453 
454         idx = h1;
455 #if defined(PETSC_USE_BOPT_g)
456         insert_ct++;
457         total_ct++;
458         if (HT[idx] != key) {
459           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
460           if (idx == size) {
461             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
462             if (idx == h1) {
463               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
464             }
465           }
466         }
467 #else
468         if (HT[idx] != key) {
469           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
470           if (idx == size) {
471             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
472             if (idx == h1) {
473               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
474             }
475           }
476         }
477 #endif
478         /* A HASH table entry is found, so insert the values at the correct address */
479         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
480         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
481       }
482     } else {
483       if (!baij->donotstash) {
484         if (roworiented) {
485           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
486         } else {
487           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
488         }
489       }
490     }
491   }
492 #if defined(PETSC_USE_BOPT_g)
493   baij->ht_total_ct = total_ct;
494   baij->ht_insert_ct = insert_ct;
495 #endif
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNC__
500 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
501 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
502 {
503   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
504   int         ierr,i,j,ii,jj,row,col;
505   int         roworiented = baij->roworiented,rstart=baij->rstart ;
506   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
507   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
508   double      tmp;
509   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
510 #if defined(PETSC_USE_BOPT_g)
511   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
512 #endif
513 
514   PetscFunctionBegin;
515 
516   if (roworiented) {
517     stepval = (n-1)*bs;
518   } else {
519     stepval = (m-1)*bs;
520   }
521   for ( i=0; i<m; i++ ) {
522 #if defined(PETSC_USE_BOPT_g)
523     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
524     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
525 #endif
526     row   = im[i];
527     v_t   = v + i*bs2;
528     if (row >= rstart && row < rend) {
529       for ( j=0; j<n; j++ ) {
530         col = in[j];
531 
532         /* Look up into the Hash Table */
533         key = row*Nbs+col+1;
534         h1  = HASH(size,key,tmp);
535 
536         idx = h1;
537 #if defined(PETSC_USE_BOPT_g)
538         total_ct++;
539         insert_ct++;
540        if (HT[idx] != key) {
541           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
542           if (idx == size) {
543             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
544             if (idx == h1) {
545               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
546             }
547           }
548         }
549 #else
550         if (HT[idx] != key) {
551           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
552           if (idx == size) {
553             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
554             if (idx == h1) {
555               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
556             }
557           }
558         }
559 #endif
560         baij_a = HD[idx];
561         if (roworiented) {
562           /*value = v + i*(stepval+bs)*bs + j*bs;*/
563           /* value = v + (i*(stepval+bs)+j)*bs; */
564           value = v_t;
565           v_t  += bs;
566           if (addv == ADD_VALUES) {
567             for ( ii=0; ii<bs; ii++,value+=stepval) {
568               for ( jj=ii; jj<bs2; jj+=bs ) {
569                 baij_a[jj]  += *value++;
570               }
571             }
572           } else {
573             for ( ii=0; ii<bs; ii++,value+=stepval) {
574               for ( jj=ii; jj<bs2; jj+=bs ) {
575                 baij_a[jj]  = *value++;
576               }
577             }
578           }
579         } else {
580           value = v + j*(stepval+bs)*bs + i*bs;
581           if (addv == ADD_VALUES) {
582             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
583               for ( jj=0; jj<bs; jj++ ) {
584                 baij_a[jj]  += *value++;
585               }
586             }
587           } else {
588             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
589               for ( jj=0; jj<bs; jj++ ) {
590                 baij_a[jj]  = *value++;
591               }
592             }
593           }
594         }
595       }
596     } else {
597       if (!baij->donotstash) {
598         if (roworiented) {
599           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
600         } else {
601           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
602         }
603       }
604     }
605   }
606 #if defined(PETSC_USE_BOPT_g)
607   baij->ht_total_ct = total_ct;
608   baij->ht_insert_ct = insert_ct;
609 #endif
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNC__
614 #define __FUNC__ "MatGetValues_MPIBAIJ"
615 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
616 {
617   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
618   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
619   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
620 
621   PetscFunctionBegin;
622   for ( i=0; i<m; i++ ) {
623     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
624     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
625     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
626       row = idxm[i] - bsrstart;
627       for ( j=0; j<n; j++ ) {
628         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
629         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
630         if (idxn[j] >= bscstart && idxn[j] < bscend){
631           col = idxn[j] - bscstart;
632           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633         } else {
634           if (!baij->colmap) {
635             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
636           }
637 #if defined (PETSC_USE_CTABLE)
638           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
639           data --;
640 #else
641           data = baij->colmap[idxn[j]/bs]-1;
642 #endif
643           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
644           else {
645             col  = data + idxn[j]%bs;
646             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647           }
648         }
649       }
650     } else {
651       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
652     }
653   }
654  PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNC__
658 #define __FUNC__ "MatNorm_MPIBAIJ"
659 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
660 {
661   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
662   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
663   int        ierr, i,bs2=baij->bs2;
664   double     sum = 0.0;
665   Scalar     *v;
666 
667   PetscFunctionBegin;
668   if (baij->size == 1) {
669     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
670   } else {
671     if (type == NORM_FROBENIUS) {
672       v = amat->a;
673       for (i=0; i<amat->nz*bs2; i++ ) {
674 #if defined(PETSC_USE_COMPLEX)
675         sum += PetscReal(PetscConj(*v)*(*v)); v++;
676 #else
677         sum += (*v)*(*v); v++;
678 #endif
679       }
680       v = bmat->a;
681       for (i=0; i<bmat->nz*bs2; i++ ) {
682 #if defined(PETSC_USE_COMPLEX)
683         sum += PetscReal(PetscConj(*v)*(*v)); v++;
684 #else
685         sum += (*v)*(*v); v++;
686 #endif
687       }
688       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
689       *norm = sqrt(*norm);
690     } else {
691       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
692     }
693   }
694   PetscFunctionReturn(0);
695 }
696 
697 
698 /*
699   Creates the hash table, and sets the table
700   This table is created only once.
701   If new entried need to be added to the matrix
702   then the hash table has to be destroyed and
703   recreated.
704 */
705 #undef __FUNC__
706 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
707 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
708 {
709   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
710   Mat         A = baij->A, B=baij->B;
711   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
712   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
713   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
714   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
715   int         *HT,key;
716   Scalar      **HD;
717   double      tmp;
718 #if defined(PETSC_USE_BOPT_g)
719   int         ct=0,max=0;
720 #endif
721 
722   PetscFunctionBegin;
723   baij->ht_size=(int)(factor*nz);
724   size = baij->ht_size;
725 
726   if (baij->ht) {
727     PetscFunctionReturn(0);
728   }
729 
730   /* Allocate Memory for Hash Table */
731   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
732   baij->ht = (int*)(baij->hd + size);
733   HD = baij->hd;
734   HT = baij->ht;
735 
736 
737   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
738 
739 
740   /* Loop Over A */
741   for ( i=0; i<a->mbs; i++ ) {
742     for ( j=ai[i]; j<ai[i+1]; j++ ) {
743       row = i+rstart;
744       col = aj[j]+cstart;
745 
746       key = row*Nbs + col + 1;
747       h1  = HASH(size,key,tmp);
748       for ( k=0; k<size; k++ ){
749         if (HT[(h1+k)%size] == 0.0) {
750           HT[(h1+k)%size] = key;
751           HD[(h1+k)%size] = a->a + j*bs2;
752           break;
753 #if defined(PETSC_USE_BOPT_g)
754         } else {
755           ct++;
756 #endif
757         }
758       }
759 #if defined(PETSC_USE_BOPT_g)
760       if (k> max) max = k;
761 #endif
762     }
763   }
764   /* Loop Over B */
765   for ( i=0; i<b->mbs; i++ ) {
766     for ( j=bi[i]; j<bi[i+1]; j++ ) {
767       row = i+rstart;
768       col = garray[bj[j]];
769       key = row*Nbs + col + 1;
770       h1  = HASH(size,key,tmp);
771       for ( k=0; k<size; k++ ){
772         if (HT[(h1+k)%size] == 0.0) {
773           HT[(h1+k)%size] = key;
774           HD[(h1+k)%size] = b->a + j*bs2;
775           break;
776 #if defined(PETSC_USE_BOPT_g)
777         } else {
778           ct++;
779 #endif
780         }
781       }
782 #if defined(PETSC_USE_BOPT_g)
783       if (k> max) max = k;
784 #endif
785     }
786   }
787 
788   /* Print Summary */
789 #if defined(PETSC_USE_BOPT_g)
790   for ( i=0,j=0; i<size; i++)
791     if (HT[i]) {j++;}
792   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
793            (j== 0)? 0.0:((double)(ct+j))/j,max);
794 #endif
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNC__
799 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
800 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
801 {
802   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
803   int         ierr,nstash,reallocs;
804   InsertMode  addv;
805 
806   PetscFunctionBegin;
807   if (baij->donotstash) {
808     PetscFunctionReturn(0);
809   }
810 
811   /* make sure all processors are either in INSERTMODE or ADDMODE */
812   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
813   if (addv == (ADD_VALUES|INSERT_VALUES)) {
814     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
815   }
816   mat->insertmode = addv; /* in case this processor had no cache */
817 
818   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
819   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
820   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
821   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
822   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
823   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNC__
828 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
829 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
830 {
831   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
832   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
833   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
834   int         *row,*col,other_disassembled,r1,r2,r3;
835   Scalar      *val;
836   InsertMode  addv = mat->insertmode;
837 
838   PetscFunctionBegin;
839   if (!baij->donotstash) {
840     while (1) {
841       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
842       if (!flg) break;
843 
844       for ( i=0; i<n; ) {
845         /* Now identify the consecutive vals belonging to the same row */
846         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
847         if (j < n) ncols = j-i;
848         else       ncols = n-i;
849         /* Now assemble all these values with a single function call */
850         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
851         i = j;
852       }
853     }
854     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
855     /* Now process the block-stash. Since the values are stashed column-oriented,
856        set the roworiented flag to column oriented, and after MatSetValues()
857        restore the original flags */
858     r1 = baij->roworiented;
859     r2 = a->roworiented;
860     r3 = b->roworiented;
861     baij->roworiented = 0;
862     a->roworiented    = 0;
863     b->roworiented    = 0;
864     while (1) {
865       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
866       if (!flg) break;
867 
868       for ( i=0; i<n; ) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
871         if (j < n) ncols = j-i;
872         else       ncols = n-i;
873         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
874         i = j;
875       }
876     }
877     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
878     baij->roworiented = r1;
879     a->roworiented    = r2;
880     b->roworiented    = r3;
881   }
882 
883   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
884   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
885 
886   /* determine if any processor has disassembled, if so we must
887      also disassemble ourselfs, in order that we may reassemble. */
888   /*
889      if nonzero structure of submatrix B cannot change then we know that
890      no processor disassembled thus we can skip this stuff
891   */
892   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
893     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
894     if (mat->was_assembled && !other_disassembled) {
895       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
896     }
897   }
898 
899   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
900     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
901   }
902   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
903   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
904 
905 #if defined(PETSC_USE_BOPT_g)
906   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
907     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
908              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
909     baij->ht_total_ct  = 0;
910     baij->ht_insert_ct = 0;
911   }
912 #endif
913   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
914     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
915     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
916     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
917   }
918 
919   if (baij->rowvalues) {
920     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
921     baij->rowvalues = 0;
922   }
923   PetscFunctionReturn(0);
924 }
925 
926 /*
927 #undef __FUNC__
928 #define __FUNC__ "MatView_MPIBAIJ_Binary"
929 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
930 {
931   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
932   int          ierr;
933 
934   PetscFunctionBegin;
935   if (baij->size == 1) {
936     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
937   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
938   PetscFunctionReturn(0);
939 }
940 */
941 
942 #undef __FUNC__
943 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
944 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
945 {
946   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
947   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
948   FILE         *fd;
949   int          isascii,isdraw;
950 
951   PetscFunctionBegin;
952   isascii = PetscTypeCompare(viewer,ASCII_VIEWER);
953   isdraw  = PetscTypeCompare(viewer,DRAW_VIEWER);
954   if (isascii) {
955     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
956     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
957       MatInfo info;
958       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
959       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
960       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr);
962       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
963               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
964               baij->bs,(int)info.memory);
965       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
966       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
967       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
968       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
969       fflush(fd);
970       ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr);
971       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
972       PetscFunctionReturn(0);
973     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
974       ierr = PetscPrintf(mat->comm,"  block size is %d\n",bs);CHKERRQ(ierr);
975       PetscFunctionReturn(0);
976     }
977   }
978 
979   if (isdraw) {
980     Draw       draw;
981     PetscTruth isnull;
982     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
983     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
984   }
985 
986   if (size == 1) {
987     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
988   } else {
989     /* assemble the entire matrix onto first processor. */
990     Mat         A;
991     Mat_SeqBAIJ *Aloc;
992     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
993     int         mbs = baij->mbs;
994     Scalar      *a;
995 
996     if (!rank) {
997       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
998     } else {
999       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1000     }
1001     PLogObjectParent(mat,A);
1002 
1003     /* copy over the A part */
1004     Aloc = (Mat_SeqBAIJ*) baij->A->data;
1005     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1006     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1007 
1008     for ( i=0; i<mbs; i++ ) {
1009       rvals[0] = bs*(baij->rstart + i);
1010       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1011       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1012         col = (baij->cstart+aj[j])*bs;
1013         for (k=0; k<bs; k++ ) {
1014           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1015           col++; a += bs;
1016         }
1017       }
1018     }
1019     /* copy over the B part */
1020     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1021     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1022     for ( i=0; i<mbs; i++ ) {
1023       rvals[0] = bs*(baij->rstart + i);
1024       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1025       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1026         col = baij->garray[aj[j]]*bs;
1027         for (k=0; k<bs; k++ ) {
1028           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1029           col++; a += bs;
1030         }
1031       }
1032     }
1033     ierr = PetscFree(rvals);CHKERRQ(ierr);
1034     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1035     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1036     /*
1037        Everyone has to call to draw the matrix since the graphics waits are
1038        synchronized across all processors that share the Draw object
1039     */
1040     if (!rank || PetscTypeCompare(viewer,DRAW_VIEWER)) {
1041       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
1042     }
1043     ierr = MatDestroy(A);CHKERRQ(ierr);
1044   }
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 
1049 
1050 #undef __FUNC__
1051 #define __FUNC__ "MatView_MPIBAIJ"
1052 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1053 {
1054   int         ierr;
1055   int         isascii,isdraw,issocket,isbinary;
1056 
1057   PetscFunctionBegin;
1058   isascii  = PetscTypeCompare(viewer,ASCII_VIEWER);
1059   isdraw   = PetscTypeCompare(viewer,DRAW_VIEWER);
1060   issocket = PetscTypeCompare(viewer,SOCKET_VIEWER);
1061   isbinary = PetscTypeCompare(viewer,BINARY_VIEWER);
1062   if (isascii || isdraw || issocket || isbinary) {
1063     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1064   } else {
1065     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNC__
1071 #define __FUNC__ "MatDestroy_MPIBAIJ"
1072 int MatDestroy_MPIBAIJ(Mat mat)
1073 {
1074   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1075   int         ierr;
1076 
1077   PetscFunctionBegin;
1078 
1079   if (mat->mapping) {
1080     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1081   }
1082   if (mat->bmapping) {
1083     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1084   }
1085   if (mat->rmap) {
1086     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1087   }
1088   if (mat->cmap) {
1089     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1090   }
1091 #if defined(PETSC_USE_LOG)
1092   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1093 #endif
1094 
1095   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1096   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1097 
1098   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1099   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1100   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1101 #if defined (PETSC_USE_CTABLE)
1102   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1103 #else
1104   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1105 #endif
1106   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1107   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1108   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1109   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1110   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1111   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1112   ierr = PetscFree(baij);CHKERRQ(ierr);
1113   PLogObjectDestroy(mat);
1114   PetscHeaderDestroy(mat);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNC__
1119 #define __FUNC__ "MatMult_MPIBAIJ"
1120 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1121 {
1122   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1123   int         ierr, nt;
1124 
1125   PetscFunctionBegin;
1126   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1127   if (nt != a->n) {
1128     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1129   }
1130   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1131   if (nt != a->m) {
1132     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1133   }
1134   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1135   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1136   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1137   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1138   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNC__
1143 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1144 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1145 {
1146   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1147   int        ierr;
1148 
1149   PetscFunctionBegin;
1150   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1151   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1152   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1153   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNC__
1158 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1159 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1160 {
1161   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1162   int         ierr;
1163 
1164   PetscFunctionBegin;
1165   /* do nondiagonal part */
1166   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1167   /* send it on its way */
1168   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1169   /* do local part */
1170   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1171   /* receive remote parts: note this assumes the values are not actually */
1172   /* inserted in yy until the next line, which is true for my implementation*/
1173   /* but is not perhaps always true. */
1174   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNC__
1179 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1180 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1181 {
1182   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1183   int         ierr;
1184 
1185   PetscFunctionBegin;
1186   /* do nondiagonal part */
1187   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1188   /* send it on its way */
1189   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1190   /* do local part */
1191   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1192   /* receive remote parts: note this assumes the values are not actually */
1193   /* inserted in yy until the next line, which is true for my implementation*/
1194   /* but is not perhaps always true. */
1195   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 /*
1200   This only works correctly for square matrices where the subblock A->A is the
1201    diagonal block
1202 */
1203 #undef __FUNC__
1204 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1205 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1206 {
1207   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1208   int         ierr;
1209 
1210   PetscFunctionBegin;
1211   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1212   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 #undef __FUNC__
1217 #define __FUNC__ "MatScale_MPIBAIJ"
1218 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1219 {
1220   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1221   int         ierr;
1222 
1223   PetscFunctionBegin;
1224   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1225   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNC__
1230 #define __FUNC__ "MatGetSize_MPIBAIJ"
1231 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1232 {
1233   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1234 
1235   PetscFunctionBegin;
1236   if (m) *m = mat->M;
1237   if (n) *n = mat->N;
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNC__
1242 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1243 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1244 {
1245   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1246 
1247   PetscFunctionBegin;
1248   *m = mat->m; *n = mat->n;
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNC__
1253 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1254 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1255 {
1256   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1257 
1258   PetscFunctionBegin;
1259   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNC__
1264 #define __FUNC__ "MatGetRow_MPIBAIJ"
1265 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1266 {
1267   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1268   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1269   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1270   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1271   int        *cmap, *idx_p,cstart = mat->cstart;
1272 
1273   PetscFunctionBegin;
1274   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1275   mat->getrowactive = PETSC_TRUE;
1276 
1277   if (!mat->rowvalues && (idx || v)) {
1278     /*
1279         allocate enough space to hold information from the longest row.
1280     */
1281     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1282     int     max = 1,mbs = mat->mbs,tmp;
1283     for ( i=0; i<mbs; i++ ) {
1284       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1285       if (max < tmp) { max = tmp; }
1286     }
1287     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1288     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1289   }
1290 
1291   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1292   lrow = row - brstart;
1293 
1294   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1295   if (!v)   {pvA = 0; pvB = 0;}
1296   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1297   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1298   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1299   nztot = nzA + nzB;
1300 
1301   cmap  = mat->garray;
1302   if (v  || idx) {
1303     if (nztot) {
1304       /* Sort by increasing column numbers, assuming A and B already sorted */
1305       int imark = -1;
1306       if (v) {
1307         *v = v_p = mat->rowvalues;
1308         for ( i=0; i<nzB; i++ ) {
1309           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1310           else break;
1311         }
1312         imark = i;
1313         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1314         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1315       }
1316       if (idx) {
1317         *idx = idx_p = mat->rowindices;
1318         if (imark > -1) {
1319           for ( i=0; i<imark; i++ ) {
1320             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1321           }
1322         } else {
1323           for ( i=0; i<nzB; i++ ) {
1324             if (cmap[cworkB[i]/bs] < cstart)
1325               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1326             else break;
1327           }
1328           imark = i;
1329         }
1330         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1331         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1332       }
1333     } else {
1334       if (idx) *idx = 0;
1335       if (v)   *v   = 0;
1336     }
1337   }
1338   *nz = nztot;
1339   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1340   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNC__
1345 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1346 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1347 {
1348   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1349 
1350   PetscFunctionBegin;
1351   if (baij->getrowactive == PETSC_FALSE) {
1352     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1353   }
1354   baij->getrowactive = PETSC_FALSE;
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNC__
1359 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1360 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1361 {
1362   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1363 
1364   PetscFunctionBegin;
1365   *bs = baij->bs;
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNC__
1370 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1371 int MatZeroEntries_MPIBAIJ(Mat A)
1372 {
1373   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1374   int         ierr;
1375 
1376   PetscFunctionBegin;
1377   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1378   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 #undef __FUNC__
1383 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1384 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1385 {
1386   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1387   Mat         A = a->A, B = a->B;
1388   int         ierr;
1389   double      isend[5], irecv[5];
1390 
1391   PetscFunctionBegin;
1392   info->block_size     = (double)a->bs;
1393   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1394   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1395   isend[3] = info->memory;  isend[4] = info->mallocs;
1396   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1397   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1398   isend[3] += info->memory;  isend[4] += info->mallocs;
1399   if (flag == MAT_LOCAL) {
1400     info->nz_used      = isend[0];
1401     info->nz_allocated = isend[1];
1402     info->nz_unneeded  = isend[2];
1403     info->memory       = isend[3];
1404     info->mallocs      = isend[4];
1405   } else if (flag == MAT_GLOBAL_MAX) {
1406     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1407     info->nz_used      = irecv[0];
1408     info->nz_allocated = irecv[1];
1409     info->nz_unneeded  = irecv[2];
1410     info->memory       = irecv[3];
1411     info->mallocs      = irecv[4];
1412   } else if (flag == MAT_GLOBAL_SUM) {
1413     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1414     info->nz_used      = irecv[0];
1415     info->nz_allocated = irecv[1];
1416     info->nz_unneeded  = irecv[2];
1417     info->memory       = irecv[3];
1418     info->mallocs      = irecv[4];
1419   } else {
1420     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1421   }
1422   info->rows_global       = (double)a->M;
1423   info->columns_global    = (double)a->N;
1424   info->rows_local        = (double)a->m;
1425   info->columns_local     = (double)a->N;
1426   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1427   info->fill_ratio_needed = 0;
1428   info->factor_mallocs    = 0;
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNC__
1433 #define __FUNC__ "MatSetOption_MPIBAIJ"
1434 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1435 {
1436   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1437   int         ierr;
1438 
1439   PetscFunctionBegin;
1440   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1441       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1442       op == MAT_COLUMNS_UNSORTED ||
1443       op == MAT_COLUMNS_SORTED ||
1444       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1445       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1446         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1447         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1448   } else if (op == MAT_ROW_ORIENTED) {
1449         a->roworiented = 1;
1450         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1451         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1452   } else if (op == MAT_ROWS_SORTED ||
1453              op == MAT_ROWS_UNSORTED ||
1454              op == MAT_SYMMETRIC ||
1455              op == MAT_STRUCTURALLY_SYMMETRIC ||
1456              op == MAT_YES_NEW_DIAGONALS ||
1457              op == MAT_USE_HASH_TABLE) {
1458     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1459   } else if (op == MAT_COLUMN_ORIENTED) {
1460     a->roworiented = 0;
1461     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1462     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1463   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1464     a->donotstash = 1;
1465   } else if (op == MAT_NO_NEW_DIAGONALS) {
1466     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1467   } else if (op == MAT_USE_HASH_TABLE) {
1468     a->ht_flag = 1;
1469   } else {
1470     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNC__
1476 #define __FUNC__ "MatTranspose_MPIBAIJ("
1477 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1478 {
1479   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1480   Mat_SeqBAIJ *Aloc;
1481   Mat        B;
1482   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1483   int        bs=baij->bs,mbs=baij->mbs;
1484   Scalar     *a;
1485 
1486   PetscFunctionBegin;
1487   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1488   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1489 CHKERRQ(ierr);
1490 
1491   /* copy over the A part */
1492   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1493   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1494   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1495 
1496   for ( i=0; i<mbs; i++ ) {
1497     rvals[0] = bs*(baij->rstart + i);
1498     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1499     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1500       col = (baij->cstart+aj[j])*bs;
1501       for (k=0; k<bs; k++ ) {
1502         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1503         col++; a += bs;
1504       }
1505     }
1506   }
1507   /* copy over the B part */
1508   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1509   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1510   for ( i=0; i<mbs; i++ ) {
1511     rvals[0] = bs*(baij->rstart + i);
1512     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1513     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1514       col = baij->garray[aj[j]]*bs;
1515       for (k=0; k<bs; k++ ) {
1516         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1517         col++; a += bs;
1518       }
1519     }
1520   }
1521   ierr = PetscFree(rvals);CHKERRQ(ierr);
1522   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1523   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1524 
1525   if (matout != PETSC_NULL) {
1526     *matout = B;
1527   } else {
1528     PetscOps *Abops;
1529     MatOps   Aops;
1530 
1531     /* This isn't really an in-place transpose .... but free data structures from baij */
1532     ierr = PetscFree(baij->rowners); CHKERRQ(ierr);
1533     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1534     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1535 #if defined (PETSC_USE_CTABLE)
1536     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1537 #else
1538     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1539 #endif
1540     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1541     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1542     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1543     ierr = PetscFree(baij);CHKERRQ(ierr);
1544 
1545     /*
1546        This is horrible, horrible code. We need to keep the
1547       A pointers for the bops and ops but copy everything
1548       else from C.
1549     */
1550     Abops   = A->bops;
1551     Aops    = A->ops;
1552     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1553     A->bops = Abops;
1554     A->ops  = Aops;
1555 
1556     PetscHeaderDestroy(B);
1557   }
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNC__
1562 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1563 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1564 {
1565   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1566   Mat         a = baij->A, b = baij->B;
1567   int         ierr,s1,s2,s3;
1568 
1569   PetscFunctionBegin;
1570   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1571   if (rr) {
1572     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1573     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1574     /* Overlap communication with computation. */
1575     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1576   }
1577   if (ll) {
1578     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1579     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1580     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1581   }
1582   /* scale  the diagonal block */
1583   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1584 
1585   if (rr) {
1586     /* Do a scatter end and then right scale the off-diagonal block */
1587     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1588     ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr);
1589   }
1590 
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 #undef __FUNC__
1595 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1596 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1597 {
1598   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1599   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1600   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1601   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1602   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1603   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1604   MPI_Comm       comm = A->comm;
1605   MPI_Request    *send_waits,*recv_waits;
1606   MPI_Status     recv_status,*send_status;
1607   IS             istmp;
1608 
1609   PetscFunctionBegin;
1610   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1611   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1612 
1613   /*  first count number of contributors to each processor */
1614   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1615   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1616  procs   = nprocs + size;
1617   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1618   for ( i=0; i<N; i++ ) {
1619     idx = rows[i];
1620     found = 0;
1621     for ( j=0; j<size; j++ ) {
1622       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1623         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1624       }
1625     }
1626     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1627   }
1628   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1629 
1630   /* inform other processors of number of messages and max length*/
1631   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
1632   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1633   nrecvs = work[rank];
1634   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1635   nmax   = work[rank];
1636   ierr = PetscFree(work);CHKERRQ(ierr);
1637 
1638   /* post receives:   */
1639   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1640   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1641   for ( i=0; i<nrecvs; i++ ) {
1642     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1643   }
1644 
1645   /* do sends:
1646      1) starts[i] gives the starting index in svalues for stuff going to
1647      the ith processor
1648   */
1649   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1650   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1651   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
1652   starts[0] = 0;
1653   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1654   for ( i=0; i<N; i++ ) {
1655     svalues[starts[owner[i]]++] = rows[i];
1656   }
1657   ISRestoreIndices(is,&rows);
1658 
1659   starts[0] = 0;
1660   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1661   count = 0;
1662   for ( i=0; i<size; i++ ) {
1663     if (procs[i]) {
1664       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1665     }
1666   }
1667   ierr = PetscFree(starts);CHKERRQ(ierr);
1668 
1669   base = owners[rank]*bs;
1670 
1671   /*  wait on receives */
1672   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
1673   source = lens + nrecvs;
1674   count  = nrecvs; slen = 0;
1675   while (count) {
1676     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1677     /* unpack receives into our local space */
1678     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1679     source[imdex]  = recv_status.MPI_SOURCE;
1680     lens[imdex]  = n;
1681     slen += n;
1682     count--;
1683   }
1684   ierr = PetscFree(recv_waits); CHKERRQ(ierr);
1685 
1686   /* move the data into the send scatter */
1687   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
1688   count = 0;
1689   for ( i=0; i<nrecvs; i++ ) {
1690     values = rvalues + i*nmax;
1691     for ( j=0; j<lens[i]; j++ ) {
1692       lrows[count++] = values[j] - base;
1693     }
1694   }
1695   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1696   ierr = PetscFree(lens);CHKERRQ(ierr);
1697   ierr = PetscFree(owner);CHKERRQ(ierr);
1698   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1699 
1700   /* actually zap the local rows */
1701   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1702   PLogObjectParent(A,istmp);
1703 
1704   /*
1705         Zero the required rows. If the "diagonal block" of the matrix
1706      is square and the user wishes to set the diagonal we use seperate
1707      code so that MatSetValues() is not called for each diagonal allocating
1708      new memory, thus calling lots of mallocs and slowing things down.
1709 
1710        Contributed by: Mathew Knepley
1711   */
1712   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1713   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
1714   if (diag && (l->A->M == l->A->N)) {
1715     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
1716   } else if (diag) {
1717     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1718     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1719       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1720 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1721     }
1722     for ( i = 0; i < slen; i++ ) {
1723       row  = lrows[i] + rstart_bs;
1724       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1725     }
1726     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1727     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1728   } else {
1729     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1730   }
1731 
1732   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1733   ierr = PetscFree(lrows);CHKERRQ(ierr);
1734 
1735   /* wait on sends */
1736   if (nsends) {
1737     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1738     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1739     ierr = PetscFree(send_status);CHKERRQ(ierr);
1740   }
1741   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1742   ierr = PetscFree(svalues);CHKERRQ(ierr);
1743 
1744   PetscFunctionReturn(0);
1745 }
1746 
1747 #undef __FUNC__
1748 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1749 int MatPrintHelp_MPIBAIJ(Mat A)
1750 {
1751   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1752   MPI_Comm    comm = A->comm;
1753   static int  called = 0;
1754   int         ierr;
1755 
1756   PetscFunctionBegin;
1757   if (!a->rank) {
1758     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1759   }
1760   if (called) {PetscFunctionReturn(0);} else called = 1;
1761   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1762   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 #undef __FUNC__
1767 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1768 int MatSetUnfactored_MPIBAIJ(Mat A)
1769 {
1770   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1771   int         ierr;
1772 
1773   PetscFunctionBegin;
1774   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1779 
1780 #undef __FUNC__
1781 #define __FUNC__ "MatEqual_MPIBAIJ"
1782 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
1783 {
1784   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
1785   Mat         a, b, c, d;
1786   PetscTruth  flg;
1787   int         ierr;
1788 
1789   PetscFunctionBegin;
1790   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1791   a = matA->A; b = matA->B;
1792   c = matB->A; d = matB->B;
1793 
1794   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1795   if (flg == PETSC_TRUE) {
1796     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1797   }
1798   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 /* -------------------------------------------------------------------*/
1803 static struct _MatOps MatOps_Values = {
1804   MatSetValues_MPIBAIJ,
1805   MatGetRow_MPIBAIJ,
1806   MatRestoreRow_MPIBAIJ,
1807   MatMult_MPIBAIJ,
1808   MatMultAdd_MPIBAIJ,
1809   MatMultTrans_MPIBAIJ,
1810   MatMultTransAdd_MPIBAIJ,
1811   0,
1812   0,
1813   0,
1814   0,
1815   0,
1816   0,
1817   0,
1818   MatTranspose_MPIBAIJ,
1819   MatGetInfo_MPIBAIJ,
1820   MatEqual_MPIBAIJ,
1821   MatGetDiagonal_MPIBAIJ,
1822   MatDiagonalScale_MPIBAIJ,
1823   MatNorm_MPIBAIJ,
1824   MatAssemblyBegin_MPIBAIJ,
1825   MatAssemblyEnd_MPIBAIJ,
1826   0,
1827   MatSetOption_MPIBAIJ,
1828   MatZeroEntries_MPIBAIJ,
1829   MatZeroRows_MPIBAIJ,
1830   0,
1831   0,
1832   0,
1833   0,
1834   MatGetSize_MPIBAIJ,
1835   MatGetLocalSize_MPIBAIJ,
1836   MatGetOwnershipRange_MPIBAIJ,
1837   0,
1838   0,
1839   0,
1840   0,
1841   MatDuplicate_MPIBAIJ,
1842   0,
1843   0,
1844   0,
1845   0,
1846   0,
1847   MatGetSubMatrices_MPIBAIJ,
1848   MatIncreaseOverlap_MPIBAIJ,
1849   MatGetValues_MPIBAIJ,
1850   0,
1851   MatPrintHelp_MPIBAIJ,
1852   MatScale_MPIBAIJ,
1853   0,
1854   0,
1855   0,
1856   MatGetBlockSize_MPIBAIJ,
1857   0,
1858   0,
1859   0,
1860   0,
1861   0,
1862   0,
1863   MatSetUnfactored_MPIBAIJ,
1864   0,
1865   MatSetValuesBlocked_MPIBAIJ,
1866   0,
1867   0,
1868   0,
1869   MatGetMaps_Petsc};
1870 
1871 
1872 EXTERN_C_BEGIN
1873 #undef __FUNC__
1874 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1875 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1876 {
1877   PetscFunctionBegin;
1878   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1879   *iscopy = PETSC_FALSE;
1880   PetscFunctionReturn(0);
1881 }
1882 EXTERN_C_END
1883 
1884 #undef __FUNC__
1885 #define __FUNC__ "MatCreateMPIBAIJ"
1886 /*@C
1887    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1888    (block compressed row).  For good matrix assembly performance
1889    the user should preallocate the matrix storage by setting the parameters
1890    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1891    performance can be increased by more than a factor of 50.
1892 
1893    Collective on MPI_Comm
1894 
1895    Input Parameters:
1896 +  comm - MPI communicator
1897 .  bs   - size of blockk
1898 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1899            This value should be the same as the local size used in creating the
1900            y vector for the matrix-vector product y = Ax.
1901 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1902            This value should be the same as the local size used in creating the
1903            x vector for the matrix-vector product y = Ax.
1904 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1905 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1906 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1907            submatrix  (same for all local rows)
1908 .  d_nnz - array containing the number of block nonzeros in the various block rows
1909            of the in diagonal portion of the local (possibly different for each block
1910            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1911 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1912            submatrix (same for all local rows).
1913 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1914            off-diagonal portion of the local submatrix (possibly different for
1915            each block row) or PETSC_NULL.
1916 
1917    Output Parameter:
1918 .  A - the matrix
1919 
1920    Options Database Keys:
1921 .   -mat_no_unroll - uses code that does not unroll the loops in the
1922                      block calculations (much slower)
1923 .   -mat_block_size - size of the blocks to use
1924 .   -mat_mpi - use the parallel matrix data structures even on one processor
1925                (defaults to using SeqBAIJ format on one processor)
1926 
1927    Notes:
1928    The user MUST specify either the local or global matrix dimensions
1929    (possibly both).
1930 
1931    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1932    than it must be used on all processors that share the object for that argument.
1933 
1934    Storage Information:
1935    For a square global matrix we define each processor's diagonal portion
1936    to be its local rows and the corresponding columns (a square submatrix);
1937    each processor's off-diagonal portion encompasses the remainder of the
1938    local matrix (a rectangular submatrix).
1939 
1940    The user can specify preallocated storage for the diagonal part of
1941    the local submatrix with either d_nz or d_nnz (not both).  Set
1942    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1943    memory allocation.  Likewise, specify preallocated storage for the
1944    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1945 
1946    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1947    the figure below we depict these three local rows and all columns (0-11).
1948 
1949 .vb
1950            0 1 2 3 4 5 6 7 8 9 10 11
1951           -------------------
1952    row 3  |  o o o d d d o o o o o o
1953    row 4  |  o o o d d d o o o o o o
1954    row 5  |  o o o d d d o o o o o o
1955           -------------------
1956 .ve
1957 
1958    Thus, any entries in the d locations are stored in the d (diagonal)
1959    submatrix, and any entries in the o locations are stored in the
1960    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1961    stored simply in the MATSEQBAIJ format for compressed row storage.
1962 
1963    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1964    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1965    In general, for PDE problems in which most nonzeros are near the diagonal,
1966    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1967    or you will get TERRIBLE performance; see the users' manual chapter on
1968    matrices.
1969 
1970    Level: intermediate
1971 
1972 .keywords: matrix, block, aij, compressed row, sparse, parallel
1973 
1974 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1975 @*/
1976 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1977                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1978 {
1979   Mat          B;
1980   Mat_MPIBAIJ  *b;
1981   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1982   int          flag1 = 0,flag2 = 0;
1983 
1984   PetscFunctionBegin;
1985   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1986 
1987   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1988   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
1989   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
1990   if (d_nnz) {
1991     for (i=0; i<m/bs; i++) {
1992       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1993     }
1994   }
1995   if (o_nnz) {
1996     for (i=0; i<m/bs; i++) {
1997       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1998     }
1999   }
2000 
2001   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2002   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2003   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2004   if (!flag1 && !flag2 && size == 1) {
2005     if (M == PETSC_DECIDE) M = m;
2006     if (N == PETSC_DECIDE) N = n;
2007     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2008     PetscFunctionReturn(0);
2009   }
2010 
2011   *A = 0;
2012   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2013   PLogObjectCreate(B);
2014   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2015   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2016   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2017 
2018   B->ops->destroy    = MatDestroy_MPIBAIJ;
2019   B->ops->view       = MatView_MPIBAIJ;
2020   B->mapping    = 0;
2021   B->factor     = 0;
2022   B->assembled  = PETSC_FALSE;
2023 
2024   B->insertmode = NOT_SET_VALUES;
2025   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2026   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2027 
2028   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2029     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2030   }
2031   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2032     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2033   }
2034   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2035     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2036   }
2037   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2038   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2039 
2040   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2041     work[0] = m; work[1] = n;
2042     mbs = m/bs; nbs = n/bs;
2043     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2044     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2045     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2046   }
2047   if (m == PETSC_DECIDE) {
2048     Mbs = M/bs;
2049     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2050     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2051     m   = mbs*bs;
2052   }
2053   if (n == PETSC_DECIDE) {
2054     Nbs = N/bs;
2055     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2056     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2057     n   = nbs*bs;
2058   }
2059   if (mbs*bs != m || nbs*bs != n) {
2060     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2061   }
2062 
2063   b->m = m; B->m = m;
2064   b->n = n; B->n = n;
2065   b->N = N; B->N = N;
2066   b->M = M; B->M = M;
2067   b->bs  = bs;
2068   b->bs2 = bs*bs;
2069   b->mbs = mbs;
2070   b->nbs = nbs;
2071   b->Mbs = Mbs;
2072   b->Nbs = Nbs;
2073 
2074   /* the information in the maps duplicates the information computed below, eventually
2075      we should remove the duplicate information that is not contained in the maps */
2076   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2077   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2078 
2079   /* build local table of row and column ownerships */
2080   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2081   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2082   b->cowners    = b->rowners + b->size + 2;
2083   b->rowners_bs = b->cowners + b->size + 2;
2084   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2085   b->rowners[0]    = 0;
2086   for ( i=2; i<=b->size; i++ ) {
2087     b->rowners[i] += b->rowners[i-1];
2088   }
2089   for ( i=0; i<=b->size; i++ ) {
2090     b->rowners_bs[i] = b->rowners[i]*bs;
2091   }
2092   b->rstart    = b->rowners[b->rank];
2093   b->rend      = b->rowners[b->rank+1];
2094   b->rstart_bs = b->rstart * bs;
2095   b->rend_bs   = b->rend * bs;
2096 
2097   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2098   b->cowners[0] = 0;
2099   for ( i=2; i<=b->size; i++ ) {
2100     b->cowners[i] += b->cowners[i-1];
2101   }
2102   b->cstart    = b->cowners[b->rank];
2103   b->cend      = b->cowners[b->rank+1];
2104   b->cstart_bs = b->cstart * bs;
2105   b->cend_bs   = b->cend * bs;
2106 
2107 
2108   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2109   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2110   PLogObjectParent(B,b->A);
2111   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2112   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2113   PLogObjectParent(B,b->B);
2114 
2115   /* build cache for off array entries formed */
2116   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2117   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2118   b->donotstash  = 0;
2119   b->colmap      = 0;
2120   b->garray      = 0;
2121   b->roworiented = 1;
2122 
2123   /* stuff used in block assembly */
2124   b->barray       = 0;
2125 
2126   /* stuff used for matrix vector multiply */
2127   b->lvec         = 0;
2128   b->Mvctx        = 0;
2129 
2130   /* stuff for MatGetRow() */
2131   b->rowindices   = 0;
2132   b->rowvalues    = 0;
2133   b->getrowactive = PETSC_FALSE;
2134 
2135   /* hash table stuff */
2136   b->ht           = 0;
2137   b->hd           = 0;
2138   b->ht_size      = 0;
2139   b->ht_flag      = 0;
2140   b->ht_fact      = 0;
2141   b->ht_total_ct  = 0;
2142   b->ht_insert_ct = 0;
2143 
2144   *A = B;
2145   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2146   if (flg) {
2147     double fact = 1.39;
2148     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2149     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr);
2150     if (fact <= 1.0) fact = 1.39;
2151     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2152     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2153   }
2154   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
2155                                      "MatStoreValues_MPIBAIJ",
2156                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2157   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
2158                                      "MatRetrieveValues_MPIBAIJ",
2159                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2160   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2161                                      "MatGetDiagonalBlock_MPIBAIJ",
2162                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 #undef __FUNC__
2167 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2168 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2169 {
2170   Mat         mat;
2171   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2172   int         ierr, len=0, flg;
2173 
2174   PetscFunctionBegin;
2175   *newmat       = 0;
2176   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2177   PLogObjectCreate(mat);
2178   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2179   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2180   mat->ops->destroy = MatDestroy_MPIBAIJ;
2181   mat->ops->view    = MatView_MPIBAIJ;
2182   mat->factor       = matin->factor;
2183   mat->assembled    = PETSC_TRUE;
2184   mat->insertmode   = NOT_SET_VALUES;
2185 
2186   a->m = mat->m   = oldmat->m;
2187   a->n = mat->n   = oldmat->n;
2188   a->M = mat->M   = oldmat->M;
2189   a->N = mat->N   = oldmat->N;
2190 
2191   a->bs  = oldmat->bs;
2192   a->bs2 = oldmat->bs2;
2193   a->mbs = oldmat->mbs;
2194   a->nbs = oldmat->nbs;
2195   a->Mbs = oldmat->Mbs;
2196   a->Nbs = oldmat->Nbs;
2197 
2198   a->rstart       = oldmat->rstart;
2199   a->rend         = oldmat->rend;
2200   a->cstart       = oldmat->cstart;
2201   a->cend         = oldmat->cend;
2202   a->size         = oldmat->size;
2203   a->rank         = oldmat->rank;
2204   a->donotstash   = oldmat->donotstash;
2205   a->roworiented  = oldmat->roworiented;
2206   a->rowindices   = 0;
2207   a->rowvalues    = 0;
2208   a->getrowactive = PETSC_FALSE;
2209   a->barray       = 0;
2210   a->rstart_bs    = oldmat->rstart_bs;
2211   a->rend_bs      = oldmat->rend_bs;
2212   a->cstart_bs    = oldmat->cstart_bs;
2213   a->cend_bs      = oldmat->cend_bs;
2214 
2215   /* hash table stuff */
2216   a->ht           = 0;
2217   a->hd           = 0;
2218   a->ht_size      = 0;
2219   a->ht_flag      = oldmat->ht_flag;
2220   a->ht_fact      = oldmat->ht_fact;
2221   a->ht_total_ct  = 0;
2222   a->ht_insert_ct = 0;
2223 
2224 
2225   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2226   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2227   a->cowners    = a->rowners + a->size + 2;
2228   a->rowners_bs = a->cowners + a->size + 2;
2229   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2230   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2231   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2232   if (oldmat->colmap) {
2233 #if defined (PETSC_USE_CTABLE)
2234   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2235 #else
2236     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2237     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2238     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2239 #endif
2240   } else a->colmap = 0;
2241   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2242     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2243     PLogObjectMemory(mat,len*sizeof(int));
2244     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2245   } else a->garray = 0;
2246 
2247   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2248   PLogObjectParent(mat,a->lvec);
2249   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2250 
2251   PLogObjectParent(mat,a->Mvctx);
2252   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2253   PLogObjectParent(mat,a->A);
2254   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2255   PLogObjectParent(mat,a->B);
2256   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2257   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2258   if (flg) {
2259     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2260   }
2261   *newmat = mat;
2262   PetscFunctionReturn(0);
2263 }
2264 
2265 #include "sys.h"
2266 
2267 #undef __FUNC__
2268 #define __FUNC__ "MatLoad_MPIBAIJ"
2269 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2270 {
2271   Mat          A;
2272   int          i, nz, ierr, j,rstart, rend, fd;
2273   Scalar       *vals,*buf;
2274   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2275   MPI_Status   status;
2276   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2277   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2278   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2279   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2280   int          dcount,kmax,k,nzcount,tmp;
2281 
2282   PetscFunctionBegin;
2283   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2284 
2285   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2286   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2287   if (!rank) {
2288     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2289     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2290     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2291     if (header[3] < 0) {
2292       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2293     }
2294   }
2295 
2296   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2297   M = header[1]; N = header[2];
2298 
2299   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2300 
2301   /*
2302      This code adds extra rows to make sure the number of rows is
2303      divisible by the blocksize
2304   */
2305   Mbs        = M/bs;
2306   extra_rows = bs - M + bs*(Mbs);
2307   if (extra_rows == bs) extra_rows = 0;
2308   else                  Mbs++;
2309   if (extra_rows &&!rank) {
2310     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2311   }
2312 
2313   /* determine ownership of all rows */
2314   mbs = Mbs/size + ((Mbs % size) > rank);
2315   m   = mbs * bs;
2316   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2317   browners = rowners + size + 1;
2318   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2319   rowners[0] = 0;
2320   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2321   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2322   rstart = rowners[rank];
2323   rend   = rowners[rank+1];
2324 
2325   /* distribute row lengths to all processors */
2326   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
2327   if (!rank) {
2328     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2329     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2330     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2331     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2332     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2333     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2334     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2335   } else {
2336     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2337   }
2338 
2339   if (!rank) {
2340     /* calculate the number of nonzeros on each processor */
2341     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2342     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2343     for ( i=0; i<size; i++ ) {
2344       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2345         procsnz[i] += rowlengths[j];
2346       }
2347     }
2348     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2349 
2350     /* determine max buffer needed and allocate it */
2351     maxnz = 0;
2352     for ( i=0; i<size; i++ ) {
2353       maxnz = PetscMax(maxnz,procsnz[i]);
2354     }
2355     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2356 
2357     /* read in my part of the matrix column indices  */
2358     nz = procsnz[0];
2359     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2360     mycols = ibuf;
2361     if (size == 1)  nz -= extra_rows;
2362     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2363     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2364 
2365     /* read in every ones (except the last) and ship off */
2366     for ( i=1; i<size-1; i++ ) {
2367       nz   = procsnz[i];
2368       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2369       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2370     }
2371     /* read in the stuff for the last proc */
2372     if ( size != 1 ) {
2373       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2374       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2375       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2376       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2377     }
2378     ierr = PetscFree(cols);CHKERRQ(ierr);
2379   } else {
2380     /* determine buffer space needed for message */
2381     nz = 0;
2382     for ( i=0; i<m; i++ ) {
2383       nz += locrowlens[i];
2384     }
2385     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2386     mycols = ibuf;
2387     /* receive message of column indices*/
2388     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2389     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2390     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2391   }
2392 
2393   /* loop over local rows, determining number of off diagonal entries */
2394   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2395   odlens = dlens + (rend-rstart);
2396   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2397   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2398   masked1 = mask    + Mbs;
2399   masked2 = masked1 + Mbs;
2400   rowcount = 0; nzcount = 0;
2401   for ( i=0; i<mbs; i++ ) {
2402     dcount  = 0;
2403     odcount = 0;
2404     for ( j=0; j<bs; j++ ) {
2405       kmax = locrowlens[rowcount];
2406       for ( k=0; k<kmax; k++ ) {
2407         tmp = mycols[nzcount++]/bs;
2408         if (!mask[tmp]) {
2409           mask[tmp] = 1;
2410           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2411           else masked1[dcount++] = tmp;
2412         }
2413       }
2414       rowcount++;
2415     }
2416 
2417     dlens[i]  = dcount;
2418     odlens[i] = odcount;
2419 
2420     /* zero out the mask elements we set */
2421     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2422     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2423   }
2424 
2425   /* create our matrix */
2426   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2427   A = *newmat;
2428   MatSetOption(A,MAT_COLUMNS_SORTED);
2429 
2430   if (!rank) {
2431     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
2432     /* read in my part of the matrix numerical values  */
2433     nz = procsnz[0];
2434     vals = buf;
2435     mycols = ibuf;
2436     if (size == 1)  nz -= extra_rows;
2437     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2438     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2439 
2440     /* insert into matrix */
2441     jj      = rstart*bs;
2442     for ( i=0; i<m; i++ ) {
2443       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2444       mycols += locrowlens[i];
2445       vals   += locrowlens[i];
2446       jj++;
2447     }
2448     /* read in other processors (except the last one) and ship out */
2449     for ( i=1; i<size-1; i++ ) {
2450       nz   = procsnz[i];
2451       vals = buf;
2452       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2453       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2454     }
2455     /* the last proc */
2456     if ( size != 1 ){
2457       nz   = procsnz[i] - extra_rows;
2458       vals = buf;
2459       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2460       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2461       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2462     }
2463     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2464   } else {
2465     /* receive numeric values */
2466     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
2467 
2468     /* receive message of values*/
2469     vals   = buf;
2470     mycols = ibuf;
2471     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2472     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2473     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2474 
2475     /* insert into matrix */
2476     jj      = rstart*bs;
2477     for ( i=0; i<m; i++ ) {
2478       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2479       mycols += locrowlens[i];
2480       vals   += locrowlens[i];
2481       jj++;
2482     }
2483   }
2484   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2485   ierr = PetscFree(buf);CHKERRQ(ierr);
2486   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2487   ierr = PetscFree(rowners);CHKERRQ(ierr);
2488   ierr = PetscFree(dlens);CHKERRQ(ierr);
2489   ierr = PetscFree(mask);CHKERRQ(ierr);
2490   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2491   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 #undef __FUNC__
2496 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2497 /*@
2498    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2499 
2500    Input Parameters:
2501 .  mat  - the matrix
2502 .  fact - factor
2503 
2504    Collective on Mat
2505 
2506    Level: advanced
2507 
2508   Notes:
2509    This can also be set by the command line option: -mat_use_hash_table fact
2510 
2511 .keywords: matrix, hashtable, factor, HT
2512 
2513 .seealso: MatSetOption()
2514 @*/
2515 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2516 {
2517   Mat_MPIBAIJ *baij;
2518 
2519   PetscFunctionBegin;
2520   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2521   if (mat->type != MATMPIBAIJ) {
2522     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2523   }
2524   baij = (Mat_MPIBAIJ*) mat->data;
2525   baij->ht_fact = fact;
2526   PetscFunctionReturn(0);
2527 }
2528