xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a7a1495cff6e7d7f13bcb4a602de4e22c1c115f6)
1 /*$Id: mpibaij.c,v 1.186 2000/01/11 21:00:57 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 extern int MatSetUpMultiply_MPIBAIJ(Mat);
7 extern int DisAssemble_MPIBAIJ(Mat);
8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15 extern int MatPrintHelp_SeqBAIJ(Mat);
16 
17 EXTERN_C_BEGIN
18 #undef __FUNC__
19 #define __FUNC__ "MatStoreValues_MPIBAIJ"
20 int MatStoreValues_MPIBAIJ(Mat mat)
21 {
22   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
23   int         ierr;
24 
25   PetscFunctionBegin;
26   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
27   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 EXTERN_C_END
31 
32 EXTERN_C_BEGIN
33 #undef __FUNC__
34 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
35 int MatRetrieveValues_MPIBAIJ(Mat mat)
36 {
37   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
38   int         ierr;
39 
40   PetscFunctionBegin;
41   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
42   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
43   PetscFunctionReturn(0);
44 }
45 EXTERN_C_END
46 
47 /*
48      Local utility routine that creates a mapping from the global column
49    number to the local number in the off-diagonal part of the local
50    storage of the matrix.  This is done in a non scable way since the
51    length of colmap equals the global matrix length.
52 */
53 #undef __FUNC__
54 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
55 static int CreateColmap_MPIBAIJ_Private(Mat mat)
56 {
57   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
58   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
59   int         nbs = B->nbs,i,bs=B->bs,ierr;
60 
61   PetscFunctionBegin;
62 #if defined (PETSC_USE_CTABLE)
63   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
64   for (i=0; i<nbs; i++){
65     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
66   }
67 #else
68   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
69   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
70   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
71   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
72 #endif
73   PetscFunctionReturn(0);
74 }
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         Scalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
109  \
110         /* malloc new storage space */ \
111         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
112         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
113         new_j   = (int*)(new_a + bs2*new_nz); \
114         new_i   = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
118         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
122                           len*sizeof(int));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
124         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
125         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
126                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
127         /* free up old matrix storage */ \
128         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
129         if (!a->singlemalloc) { \
130           ierr = PetscFree(a->i);CHKERRQ(ierr); \
131           ierr = PetscFree(a->j);CHKERRQ(ierr);\
132         } \
133         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
134         a->singlemalloc = PETSC_TRUE; \
135  \
136         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
137         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
138         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
139         a->maxnz += bs2*CHUNKSIZE; \
140         a->reallocs++; \
141         a->nz++; \
142       } \
143       N = nrow++ - 1;  \
144       /* shift up all the later entries in this row */ \
145       for (ii=N; ii>=_i; ii--) { \
146         rp[ii+1] = rp[ii]; \
147         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
148       } \
149       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
150       rp[_i]                      = bcol;  \
151       ap[bs2*_i + bs*cidx + ridx] = value;  \
152       a_noinsert:; \
153     ailen[brow] = nrow; \
154 }
155 
156 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
157 { \
158  \
159     brow = row/bs;  \
160     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
161     rmax = bimax[brow]; nrow = bilen[brow]; \
162       bcol = col/bs; \
163       ridx = row % bs; cidx = col % bs; \
164       low = 0; high = nrow; \
165       while (high-low > 3) { \
166         t = (low+high)/2; \
167         if (rp[t] > bcol) high = t; \
168         else              low  = t; \
169       } \
170       for (_i=low; _i<high; _i++) { \
171         if (rp[_i] > bcol) break; \
172         if (rp[_i] == bcol) { \
173           bap  = ap +  bs2*_i + bs*cidx + ridx; \
174           if (addv == ADD_VALUES) *bap += value;  \
175           else                    *bap  = value;  \
176           goto b_noinsert; \
177         } \
178       } \
179       if (b->nonew == 1) goto b_noinsert; \
180       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
181       if (nrow >= rmax) { \
182         /* there is no extra room in row, therefore enlarge */ \
183         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
184         Scalar *new_a; \
185  \
186         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
187  \
188         /* malloc new storage space */ \
189         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
190         new_a   = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \
191         new_j   = (int*)(new_a + bs2*new_nz); \
192         new_i   = new_j + new_nz; \
193  \
194         /* copy over old data into new slots */ \
195         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
196         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
197         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
198         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
199         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
200                            len*sizeof(int));CHKERRQ(ierr); \
201         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
202         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
203         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
204                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
205         /* free up old matrix storage */ \
206         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
207         if (!b->singlemalloc) { \
208           ierr = PetscFree(b->i);CHKERRQ(ierr); \
209           ierr = PetscFree(b->j);CHKERRQ(ierr); \
210         } \
211         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
212         b->singlemalloc = PETSC_TRUE; \
213  \
214         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
215         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
216         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
217         b->maxnz += bs2*CHUNKSIZE; \
218         b->reallocs++; \
219         b->nz++; \
220       } \
221       N = nrow++ - 1;  \
222       /* shift up all the later entries in this row */ \
223       for (ii=N; ii>=_i; ii--) { \
224         rp[ii+1] = rp[ii]; \
225         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
226       } \
227       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
228       rp[_i]                      = bcol;  \
229       ap[bs2*_i + bs*cidx + ridx] = value;  \
230       b_noinsert:; \
231     bilen[brow] = nrow; \
232 }
233 
234 #undef __FUNC__
235 #define __FUNC__ "MatSetValues_MPIBAIJ"
236 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
237 {
238   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
239   Scalar      value;
240   int         ierr,i,j,row,col;
241   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
242   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
243   int         cend_orig=baij->cend_bs,bs=baij->bs;
244 
245   /* Some Variables required in the macro */
246   Mat         A = baij->A;
247   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
248   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
249   Scalar      *aa=a->a;
250 
251   Mat         B = baij->B;
252   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
253   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
254   Scalar      *ba=b->a;
255 
256   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
257   int         low,high,t,ridx,cidx,bs2=a->bs2;
258   Scalar      *ap,*bap;
259 
260   PetscFunctionBegin;
261   for (i=0; i<m; i++) {
262     if (im[i] < 0) continue;
263 #if defined(PETSC_USE_BOPT_g)
264     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
265 #endif
266     if (im[i] >= rstart_orig && im[i] < rend_orig) {
267       row = im[i] - rstart_orig;
268       for (j=0; j<n; j++) {
269         if (in[j] >= cstart_orig && in[j] < cend_orig){
270           col = in[j] - cstart_orig;
271           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
272           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
273           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
274         } else if (in[j] < 0) continue;
275 #if defined(PETSC_USE_BOPT_g)
276         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
277 #endif
278         else {
279           if (mat->was_assembled) {
280             if (!baij->colmap) {
281               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
282             }
283 #if defined (PETSC_USE_CTABLE)
284             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
285             col  = col - 1 + in[j]%bs;
286 #else
287             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
290               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
291               col =  in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B = baij->B;
294               b = (Mat_SeqBAIJ*)(B)->data;
295               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
296               ba=b->a;
297             }
298           } else col = in[j];
299           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
300           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
301           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
302         }
303       }
304     } else {
305       if (!baij->donotstash) {
306         if (roworiented) {
307           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
308         } else {
309           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
310         }
311       }
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNC__
318 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
319 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
320 {
321   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
322   Scalar      *value,*barray=baij->barray;
323   int         ierr,i,j,ii,jj,row,col;
324   int         roworiented = baij->roworiented,rstart=baij->rstart ;
325   int         rend=baij->rend,cstart=baij->cstart,stepval;
326   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
327 
328   PetscFunctionBegin;
329   if(!barray) {
330     baij->barray = barray = (Scalar*)PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
331   }
332 
333   if (roworiented) {
334     stepval = (n-1)*bs;
335   } else {
336     stepval = (m-1)*bs;
337   }
338   for (i=0; i<m; i++) {
339     if (im[i] < 0) continue;
340 #if defined(PETSC_USE_BOPT_g)
341     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
342 #endif
343     if (im[i] >= rstart && im[i] < rend) {
344       row = im[i] - rstart;
345       for (j=0; j<n; j++) {
346         /* If NumCol = 1 then a copy is not required */
347         if ((roworiented) && (n == 1)) {
348           barray = v + i*bs2;
349         } else if((!roworiented) && (m == 1)) {
350           barray = v + j*bs2;
351         } else { /* Here a copy is required */
352           if (roworiented) {
353             value = v + i*(stepval+bs)*bs + j*bs;
354           } else {
355             value = v + j*(stepval+bs)*bs + i*bs;
356           }
357           for (ii=0; ii<bs; ii++,value+=stepval) {
358             for (jj=0; jj<bs; jj++) {
359               *barray++  = *value++;
360             }
361           }
362           barray -=bs2;
363         }
364 
365         if (in[j] >= cstart && in[j] < cend){
366           col  = in[j] - cstart;
367           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
368         }
369         else if (in[j] < 0) continue;
370 #if defined(PETSC_USE_BOPT_g)
371         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
372 #endif
373         else {
374           if (mat->was_assembled) {
375             if (!baij->colmap) {
376               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
377             }
378 
379 #if defined(PETSC_USE_BOPT_g)
380 #if defined (PETSC_USE_CTABLE)
381             { int data;
382               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
383               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
384             }
385 #else
386             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
387 #endif
388 #endif
389 #if defined (PETSC_USE_CTABLE)
390 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
391             col  = (col - 1)/bs;
392 #else
393             col = (baij->colmap[in[j]] - 1)/bs;
394 #endif
395             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
396               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
397               col =  in[j];
398             }
399           }
400           else col = in[j];
401           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
402         }
403       }
404     } else {
405       if (!baij->donotstash) {
406         if (roworiented) {
407           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
408         } else {
409           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
410         }
411       }
412     }
413   }
414   PetscFunctionReturn(0);
415 }
416 #define HASH_KEY 0.6180339887
417 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
418 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
419 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
420 #undef __FUNC__
421 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
422 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
423 {
424   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
425   int         ierr,i,j,row,col;
426   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
427   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
428   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
429   PetscReal   tmp;
430   Scalar      ** HD = baij->hd,value;
431 #if defined(PETSC_USE_BOPT_g)
432   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
433 #endif
434 
435   PetscFunctionBegin;
436 
437   for (i=0; i<m; i++) {
438 #if defined(PETSC_USE_BOPT_g)
439     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
440     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
441 #endif
442       row = im[i];
443     if (row >= rstart_orig && row < rend_orig) {
444       for (j=0; j<n; j++) {
445         col = in[j];
446         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
447         /* Look up into the Hash Table */
448         key = (row/bs)*Nbs+(col/bs)+1;
449         h1  = HASH(size,key,tmp);
450 
451 
452         idx = h1;
453 #if defined(PETSC_USE_BOPT_g)
454         insert_ct++;
455         total_ct++;
456         if (HT[idx] != key) {
457           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
458           if (idx == size) {
459             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
460             if (idx == h1) {
461               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
462             }
463           }
464         }
465 #else
466         if (HT[idx] != key) {
467           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
468           if (idx == size) {
469             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
470             if (idx == h1) {
471               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
472             }
473           }
474         }
475 #endif
476         /* A HASH table entry is found, so insert the values at the correct address */
477         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
478         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
479       }
480     } else {
481       if (!baij->donotstash) {
482         if (roworiented) {
483           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
484         } else {
485           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
486         }
487       }
488     }
489   }
490 #if defined(PETSC_USE_BOPT_g)
491   baij->ht_total_ct = total_ct;
492   baij->ht_insert_ct = insert_ct;
493 #endif
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNC__
498 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
499 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
500 {
501   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
502   int         ierr,i,j,ii,jj,row,col;
503   int         roworiented = baij->roworiented,rstart=baij->rstart ;
504   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
505   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
506   PetscReal   tmp;
507   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
508 #if defined(PETSC_USE_BOPT_g)
509   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
510 #endif
511 
512   PetscFunctionBegin;
513 
514   if (roworiented) {
515     stepval = (n-1)*bs;
516   } else {
517     stepval = (m-1)*bs;
518   }
519   for (i=0; i<m; i++) {
520 #if defined(PETSC_USE_BOPT_g)
521     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
522     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
523 #endif
524     row   = im[i];
525     v_t   = v + i*bs2;
526     if (row >= rstart && row < rend) {
527       for (j=0; j<n; j++) {
528         col = in[j];
529 
530         /* Look up into the Hash Table */
531         key = row*Nbs+col+1;
532         h1  = HASH(size,key,tmp);
533 
534         idx = h1;
535 #if defined(PETSC_USE_BOPT_g)
536         total_ct++;
537         insert_ct++;
538        if (HT[idx] != key) {
539           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
540           if (idx == size) {
541             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
542             if (idx == h1) {
543               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
544             }
545           }
546         }
547 #else
548         if (HT[idx] != key) {
549           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
550           if (idx == size) {
551             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
552             if (idx == h1) {
553               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
554             }
555           }
556         }
557 #endif
558         baij_a = HD[idx];
559         if (roworiented) {
560           /*value = v + i*(stepval+bs)*bs + j*bs;*/
561           /* value = v + (i*(stepval+bs)+j)*bs; */
562           value = v_t;
563           v_t  += bs;
564           if (addv == ADD_VALUES) {
565             for (ii=0; ii<bs; ii++,value+=stepval) {
566               for (jj=ii; jj<bs2; jj+=bs) {
567                 baij_a[jj]  += *value++;
568               }
569             }
570           } else {
571             for (ii=0; ii<bs; ii++,value+=stepval) {
572               for (jj=ii; jj<bs2; jj+=bs) {
573                 baij_a[jj]  = *value++;
574               }
575             }
576           }
577         } else {
578           value = v + j*(stepval+bs)*bs + i*bs;
579           if (addv == ADD_VALUES) {
580             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
581               for (jj=0; jj<bs; jj++) {
582                 baij_a[jj]  += *value++;
583               }
584             }
585           } else {
586             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
587               for (jj=0; jj<bs; jj++) {
588                 baij_a[jj]  = *value++;
589               }
590             }
591           }
592         }
593       }
594     } else {
595       if (!baij->donotstash) {
596         if (roworiented) {
597           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
598         } else {
599           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
600         }
601       }
602     }
603   }
604 #if defined(PETSC_USE_BOPT_g)
605   baij->ht_total_ct = total_ct;
606   baij->ht_insert_ct = insert_ct;
607 #endif
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNC__
612 #define __FUNC__ "MatGetValues_MPIBAIJ"
613 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
614 {
615   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
616   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
617   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
618 
619   PetscFunctionBegin;
620   for (i=0; i<m; i++) {
621     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
622     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
623     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
624       row = idxm[i] - bsrstart;
625       for (j=0; j<n; j++) {
626         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
627         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
628         if (idxn[j] >= bscstart && idxn[j] < bscend){
629           col = idxn[j] - bscstart;
630           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
631         } else {
632           if (!baij->colmap) {
633             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
634           }
635 #if defined (PETSC_USE_CTABLE)
636           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
637           data --;
638 #else
639           data = baij->colmap[idxn[j]/bs]-1;
640 #endif
641           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
642           else {
643             col  = data + idxn[j]%bs;
644             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
645           }
646         }
647       }
648     } else {
649       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
650     }
651   }
652  PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNC__
656 #define __FUNC__ "MatNorm_MPIBAIJ"
657 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
658 {
659   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
660   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
661   int        ierr,i,bs2=baij->bs2;
662   PetscReal  sum = 0.0;
663   Scalar     *v;
664 
665   PetscFunctionBegin;
666   if (baij->size == 1) {
667     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
668   } else {
669     if (type == NORM_FROBENIUS) {
670       v = amat->a;
671       for (i=0; i<amat->nz*bs2; i++) {
672 #if defined(PETSC_USE_COMPLEX)
673         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
674 #else
675         sum += (*v)*(*v); v++;
676 #endif
677       }
678       v = bmat->a;
679       for (i=0; i<bmat->nz*bs2; i++) {
680 #if defined(PETSC_USE_COMPLEX)
681         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
682 #else
683         sum += (*v)*(*v); v++;
684 #endif
685       }
686       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
687       *norm = sqrt(*norm);
688     } else {
689       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
690     }
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 
696 /*
697   Creates the hash table, and sets the table
698   This table is created only once.
699   If new entried need to be added to the matrix
700   then the hash table has to be destroyed and
701   recreated.
702 */
703 #undef __FUNC__
704 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
705 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
706 {
707   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
708   Mat         A = baij->A,B=baij->B;
709   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
710   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
711   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
712   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
713   int         *HT,key;
714   Scalar      **HD;
715   PetscReal   tmp;
716 #if defined(PETSC_USE_BOPT_g)
717   int         ct=0,max=0;
718 #endif
719 
720   PetscFunctionBegin;
721   baij->ht_size=(int)(factor*nz);
722   size = baij->ht_size;
723 
724   if (baij->ht) {
725     PetscFunctionReturn(0);
726   }
727 
728   /* Allocate Memory for Hash Table */
729   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
730   baij->ht = (int*)(baij->hd + size);
731   HD = baij->hd;
732   HT = baij->ht;
733 
734 
735   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
736 
737 
738   /* Loop Over A */
739   for (i=0; i<a->mbs; i++) {
740     for (j=ai[i]; j<ai[i+1]; j++) {
741       row = i+rstart;
742       col = aj[j]+cstart;
743 
744       key = row*Nbs + col + 1;
745       h1  = HASH(size,key,tmp);
746       for (k=0; k<size; k++){
747         if (HT[(h1+k)%size] == 0.0) {
748           HT[(h1+k)%size] = key;
749           HD[(h1+k)%size] = a->a + j*bs2;
750           break;
751 #if defined(PETSC_USE_BOPT_g)
752         } else {
753           ct++;
754 #endif
755         }
756       }
757 #if defined(PETSC_USE_BOPT_g)
758       if (k> max) max = k;
759 #endif
760     }
761   }
762   /* Loop Over B */
763   for (i=0; i<b->mbs; i++) {
764     for (j=bi[i]; j<bi[i+1]; j++) {
765       row = i+rstart;
766       col = garray[bj[j]];
767       key = row*Nbs + col + 1;
768       h1  = HASH(size,key,tmp);
769       for (k=0; k<size; k++){
770         if (HT[(h1+k)%size] == 0.0) {
771           HT[(h1+k)%size] = key;
772           HD[(h1+k)%size] = b->a + j*bs2;
773           break;
774 #if defined(PETSC_USE_BOPT_g)
775         } else {
776           ct++;
777 #endif
778         }
779       }
780 #if defined(PETSC_USE_BOPT_g)
781       if (k> max) max = k;
782 #endif
783     }
784   }
785 
786   /* Print Summary */
787 #if defined(PETSC_USE_BOPT_g)
788   for (i=0,j=0; i<size; i++) {
789     if (HT[i]) {j++;}
790   }
791   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
792 #endif
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNC__
797 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
798 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
799 {
800   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
801   int         ierr,nstash,reallocs;
802   InsertMode  addv;
803 
804   PetscFunctionBegin;
805   if (baij->donotstash) {
806     PetscFunctionReturn(0);
807   }
808 
809   /* make sure all processors are either in INSERTMODE or ADDMODE */
810   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
811   if (addv == (ADD_VALUES|INSERT_VALUES)) {
812     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
813   }
814   mat->insertmode = addv; /* in case this processor had no cache */
815 
816   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
817   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
818   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
819   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
820   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
821   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNC__
826 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
827 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
828 {
829   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
830   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
831   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
832   int         *row,*col,other_disassembled;
833   PetscTruth  r1,r2,r3;
834   Scalar      *val;
835   InsertMode  addv = mat->insertmode;
836 
837   PetscFunctionBegin;
838   if (!baij->donotstash) {
839     while (1) {
840       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
841       if (!flg) break;
842 
843       for (i=0; i<n;) {
844         /* Now identify the consecutive vals belonging to the same row */
845         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
846         if (j < n) ncols = j-i;
847         else       ncols = n-i;
848         /* Now assemble all these values with a single function call */
849         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
850         i = j;
851       }
852     }
853     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
854     /* Now process the block-stash. Since the values are stashed column-oriented,
855        set the roworiented flag to column oriented, and after MatSetValues()
856        restore the original flags */
857     r1 = baij->roworiented;
858     r2 = a->roworiented;
859     r3 = b->roworiented;
860     baij->roworiented = PETSC_FALSE;
861     a->roworiented    = PETSC_FALSE;
862     b->roworiented    = PETSC_FALSE;
863     while (1) {
864       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
865       if (!flg) break;
866 
867       for (i=0; i<n;) {
868         /* Now identify the consecutive vals belonging to the same row */
869         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
870         if (j < n) ncols = j-i;
871         else       ncols = n-i;
872         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
873         i = j;
874       }
875     }
876     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
877     baij->roworiented = r1;
878     a->roworiented    = r2;
879     b->roworiented    = r3;
880   }
881 
882   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
883   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
884 
885   /* determine if any processor has disassembled, if so we must
886      also disassemble ourselfs, in order that we may reassemble. */
887   /*
888      if nonzero structure of submatrix B cannot change then we know that
889      no processor disassembled thus we can skip this stuff
890   */
891   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
892     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
893     if (mat->was_assembled && !other_disassembled) {
894       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
895     }
896   }
897 
898   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
899     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
900   }
901   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
902   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
903 
904 #if defined(PETSC_USE_BOPT_g)
905   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
906     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
907     baij->ht_total_ct  = 0;
908     baij->ht_insert_ct = 0;
909   }
910 #endif
911   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
912     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
913     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
914     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
915   }
916 
917   if (baij->rowvalues) {
918     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
919     baij->rowvalues = 0;
920   }
921   PetscFunctionReturn(0);
922 }
923 
924 /*
925 #undef __FUNC__
926 #define __FUNC__ "MatView_MPIBAIJ_Binary"
927 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
928 {
929   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
930   int          ierr;
931 
932   PetscFunctionBegin;
933   if (baij->size == 1) {
934     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
935   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
936   PetscFunctionReturn(0);
937 }
938 */
939 
940 #undef __FUNC__
941 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
942 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
943 {
944   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
945   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
946   PetscTruth   isascii,isdraw;
947 
948   PetscFunctionBegin;
949   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
950   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
951   if (isascii) {
952     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
953     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
954       MatInfo info;
955       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
956       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
957       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
958               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
959               baij->bs,(int)info.memory);CHKERRQ(ierr);
960       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
964       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
965       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
966       PetscFunctionReturn(0);
967     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
968       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
969       PetscFunctionReturn(0);
970     }
971   }
972 
973   if (isdraw) {
974     Draw       draw;
975     PetscTruth isnull;
976     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
977     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
978   }
979 
980   if (size == 1) {
981     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
982   } else {
983     /* assemble the entire matrix onto first processor. */
984     Mat         A;
985     Mat_SeqBAIJ *Aloc;
986     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals;
987     int         mbs = baij->mbs;
988     Scalar      *a;
989 
990     if (!rank) {
991       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
992     } else {
993       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
994     }
995     PLogObjectParent(mat,A);
996 
997     /* copy over the A part */
998     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
999     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1000     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1001 
1002     for (i=0; i<mbs; i++) {
1003       rvals[0] = bs*(baij->rstart + i);
1004       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1005       for (j=ai[i]; j<ai[i+1]; j++) {
1006         col = (baij->cstart+aj[j])*bs;
1007         for (k=0; k<bs; k++) {
1008           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1009           col++; a += bs;
1010         }
1011       }
1012     }
1013     /* copy over the B part */
1014     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1015     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1016     for (i=0; i<mbs; i++) {
1017       rvals[0] = bs*(baij->rstart + i);
1018       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1019       for (j=ai[i]; j<ai[i+1]; j++) {
1020         col = baij->garray[aj[j]]*bs;
1021         for (k=0; k<bs; k++) {
1022           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1023           col++; a += bs;
1024         }
1025       }
1026     }
1027     ierr = PetscFree(rvals);CHKERRQ(ierr);
1028     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1030     /*
1031        Everyone has to call to draw the matrix since the graphics waits are
1032        synchronized across all processors that share the Draw object
1033     */
1034     if (!rank) {
1035       Viewer sviewer;
1036       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1037       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1038       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1039     }
1040     ierr = MatDestroy(A);CHKERRQ(ierr);
1041   }
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 
1046 
1047 #undef __FUNC__
1048 #define __FUNC__ "MatView_MPIBAIJ"
1049 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1050 {
1051   int        ierr;
1052   PetscTruth isascii,isdraw,issocket,isbinary;
1053 
1054   PetscFunctionBegin;
1055   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1056   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1059   if (isascii || isdraw || issocket || isbinary) {
1060     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1061   } else {
1062     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1063   }
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNC__
1068 #define __FUNC__ "MatDestroy_MPIBAIJ"
1069 int MatDestroy_MPIBAIJ(Mat mat)
1070 {
1071   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1072   int         ierr;
1073 
1074   PetscFunctionBegin;
1075 
1076   if (mat->mapping) {
1077     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1078   }
1079   if (mat->bmapping) {
1080     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1081   }
1082   if (mat->rmap) {
1083     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1084   }
1085   if (mat->cmap) {
1086     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1087   }
1088 #if defined(PETSC_USE_LOG)
1089   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1090 #endif
1091 
1092   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1093   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1094 
1095   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1096   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1097   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1098 #if defined (PETSC_USE_CTABLE)
1099   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1100 #else
1101   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1102 #endif
1103   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1104   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1105   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1106   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1107   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1108   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1109   ierr = PetscFree(baij);CHKERRQ(ierr);
1110   PLogObjectDestroy(mat);
1111   PetscHeaderDestroy(mat);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNC__
1116 #define __FUNC__ "MatMult_MPIBAIJ"
1117 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1118 {
1119   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1120   int         ierr,nt;
1121 
1122   PetscFunctionBegin;
1123   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1124   if (nt != a->n) {
1125     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1126   }
1127   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1128   if (nt != a->m) {
1129     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1130   }
1131   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1132   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1133   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1134   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1135   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNC__
1140 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1141 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1142 {
1143   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1144   int        ierr;
1145 
1146   PetscFunctionBegin;
1147   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1148   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1149   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1150   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNC__
1155 #define __FUNC__ "MatMultTranspose_MPIBAIJ"
1156 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1157 {
1158   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1159   int         ierr;
1160 
1161   PetscFunctionBegin;
1162   /* do nondiagonal part */
1163   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1164   /* send it on its way */
1165   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1166   /* do local part */
1167   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168   /* receive remote parts: note this assumes the values are not actually */
1169   /* inserted in yy until the next line, which is true for my implementation*/
1170   /* but is not perhaps always true. */
1171   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNC__
1176 #define __FUNC__ "MatMultTransposeAdd_MPIBAIJ"
1177 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1178 {
1179   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1180   int         ierr;
1181 
1182   PetscFunctionBegin;
1183   /* do nondiagonal part */
1184   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1185   /* send it on its way */
1186   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1187   /* do local part */
1188   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1189   /* receive remote parts: note this assumes the values are not actually */
1190   /* inserted in yy until the next line, which is true for my implementation*/
1191   /* but is not perhaps always true. */
1192   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 /*
1197   This only works correctly for square matrices where the subblock A->A is the
1198    diagonal block
1199 */
1200 #undef __FUNC__
1201 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1202 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1203 {
1204   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1205   int         ierr;
1206 
1207   PetscFunctionBegin;
1208   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1209   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNC__
1214 #define __FUNC__ "MatScale_MPIBAIJ"
1215 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1216 {
1217   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1218   int         ierr;
1219 
1220   PetscFunctionBegin;
1221   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1222   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNC__
1227 #define __FUNC__ "MatGetSize_MPIBAIJ"
1228 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1229 {
1230   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1231 
1232   PetscFunctionBegin;
1233   if (m) *m = mat->M;
1234   if (n) *n = mat->N;
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNC__
1239 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1240 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1241 {
1242   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1243 
1244   PetscFunctionBegin;
1245   *m = mat->m; *n = mat->n;
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNC__
1250 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1251 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1252 {
1253   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1254 
1255   PetscFunctionBegin;
1256   if (m) *m = mat->rstart*mat->bs;
1257   if (n) *n = mat->rend*mat->bs;
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNC__
1262 #define __FUNC__ "MatGetRow_MPIBAIJ"
1263 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1264 {
1265   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1266   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1267   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1268   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1269   int        *cmap,*idx_p,cstart = mat->cstart;
1270 
1271   PetscFunctionBegin;
1272   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1273   mat->getrowactive = PETSC_TRUE;
1274 
1275   if (!mat->rowvalues && (idx || v)) {
1276     /*
1277         allocate enough space to hold information from the longest row.
1278     */
1279     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1280     int     max = 1,mbs = mat->mbs,tmp;
1281     for (i=0; i<mbs; i++) {
1282       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1283       if (max < tmp) { max = tmp; }
1284     }
1285     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1286     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1287   }
1288 
1289   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1290   lrow = row - brstart;
1291 
1292   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1293   if (!v)   {pvA = 0; pvB = 0;}
1294   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1295   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1296   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1297   nztot = nzA + nzB;
1298 
1299   cmap  = mat->garray;
1300   if (v  || idx) {
1301     if (nztot) {
1302       /* Sort by increasing column numbers, assuming A and B already sorted */
1303       int imark = -1;
1304       if (v) {
1305         *v = v_p = mat->rowvalues;
1306         for (i=0; i<nzB; i++) {
1307           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1308           else break;
1309         }
1310         imark = i;
1311         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1312         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1313       }
1314       if (idx) {
1315         *idx = idx_p = mat->rowindices;
1316         if (imark > -1) {
1317           for (i=0; i<imark; i++) {
1318             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1319           }
1320         } else {
1321           for (i=0; i<nzB; i++) {
1322             if (cmap[cworkB[i]/bs] < cstart)
1323               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1324             else break;
1325           }
1326           imark = i;
1327         }
1328         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1329         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1330       }
1331     } else {
1332       if (idx) *idx = 0;
1333       if (v)   *v   = 0;
1334     }
1335   }
1336   *nz = nztot;
1337   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1338   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNC__
1343 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1344 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1345 {
1346   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1347 
1348   PetscFunctionBegin;
1349   if (baij->getrowactive == PETSC_FALSE) {
1350     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1351   }
1352   baij->getrowactive = PETSC_FALSE;
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNC__
1357 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1358 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1359 {
1360   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1361 
1362   PetscFunctionBegin;
1363   *bs = baij->bs;
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNC__
1368 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1369 int MatZeroEntries_MPIBAIJ(Mat A)
1370 {
1371   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1372   int         ierr;
1373 
1374   PetscFunctionBegin;
1375   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1376   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNC__
1381 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1382 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1383 {
1384   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1385   Mat         A = a->A,B = a->B;
1386   int         ierr;
1387   PetscReal   isend[5],irecv[5];
1388 
1389   PetscFunctionBegin;
1390   info->block_size     = (double)a->bs;
1391   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1392   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1393   isend[3] = info->memory;  isend[4] = info->mallocs;
1394   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1395   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1396   isend[3] += info->memory;  isend[4] += info->mallocs;
1397   if (flag == MAT_LOCAL) {
1398     info->nz_used      = isend[0];
1399     info->nz_allocated = isend[1];
1400     info->nz_unneeded  = isend[2];
1401     info->memory       = isend[3];
1402     info->mallocs      = isend[4];
1403   } else if (flag == MAT_GLOBAL_MAX) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   } else if (flag == MAT_GLOBAL_SUM) {
1411     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1412     info->nz_used      = irecv[0];
1413     info->nz_allocated = irecv[1];
1414     info->nz_unneeded  = irecv[2];
1415     info->memory       = irecv[3];
1416     info->mallocs      = irecv[4];
1417   } else {
1418     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1419   }
1420   info->rows_global       = (double)a->M;
1421   info->columns_global    = (double)a->N;
1422   info->rows_local        = (double)a->m;
1423   info->columns_local     = (double)a->N;
1424   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1425   info->fill_ratio_needed = 0;
1426   info->factor_mallocs    = 0;
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNC__
1431 #define __FUNC__ "MatSetOption_MPIBAIJ"
1432 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1433 {
1434   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1435   int         ierr;
1436 
1437   PetscFunctionBegin;
1438   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1439       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1440       op == MAT_COLUMNS_UNSORTED ||
1441       op == MAT_COLUMNS_SORTED ||
1442       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1443       op == MAT_KEEP_ZEROED_ROWS ||
1444       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1445         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1446         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1447   } else if (op == MAT_ROW_ORIENTED) {
1448         a->roworiented = PETSC_TRUE;
1449         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1450         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1451   } else if (op == MAT_ROWS_SORTED ||
1452              op == MAT_ROWS_UNSORTED ||
1453              op == MAT_SYMMETRIC ||
1454              op == MAT_STRUCTURALLY_SYMMETRIC ||
1455              op == MAT_YES_NEW_DIAGONALS ||
1456              op == MAT_USE_HASH_TABLE) {
1457     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1458   } else if (op == MAT_COLUMN_ORIENTED) {
1459     a->roworiented = PETSC_FALSE;
1460     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1461     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1462   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1463     a->donotstash = PETSC_TRUE;
1464   } else if (op == MAT_NO_NEW_DIAGONALS) {
1465     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1466   } else if (op == MAT_USE_HASH_TABLE) {
1467     a->ht_flag = PETSC_TRUE;
1468   } else {
1469     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1470   }
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNC__
1475 #define __FUNC__ "MatTranspose_MPIBAIJ("
1476 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1477 {
1478   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1479   Mat_SeqBAIJ *Aloc;
1480   Mat        B;
1481   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1482   int        bs=baij->bs,mbs=baij->mbs;
1483   Scalar     *a;
1484 
1485   PetscFunctionBegin;
1486   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1487   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1488 
1489   /* copy over the A part */
1490   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1491   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1492   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1493 
1494   for (i=0; i<mbs; i++) {
1495     rvals[0] = bs*(baij->rstart + i);
1496     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1497     for (j=ai[i]; j<ai[i+1]; j++) {
1498       col = (baij->cstart+aj[j])*bs;
1499       for (k=0; k<bs; k++) {
1500         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1501         col++; a += bs;
1502       }
1503     }
1504   }
1505   /* copy over the B part */
1506   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1507   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1508   for (i=0; i<mbs; i++) {
1509     rvals[0] = bs*(baij->rstart + i);
1510     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1511     for (j=ai[i]; j<ai[i+1]; j++) {
1512       col = baij->garray[aj[j]]*bs;
1513       for (k=0; k<bs; k++) {
1514         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1515         col++; a += bs;
1516       }
1517     }
1518   }
1519   ierr = PetscFree(rvals);CHKERRQ(ierr);
1520   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1521   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1522 
1523   if (matout) {
1524     *matout = B;
1525   } else {
1526     PetscOps *Abops;
1527     MatOps   Aops;
1528 
1529     /* This isn't really an in-place transpose .... but free data structures from baij */
1530     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1531     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1532     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1533 #if defined (PETSC_USE_CTABLE)
1534     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1535 #else
1536     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1537 #endif
1538     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1539     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1540     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1541     ierr = PetscFree(baij);CHKERRQ(ierr);
1542 
1543     /*
1544        This is horrible, horrible code. We need to keep the
1545       A pointers for the bops and ops but copy everything
1546       else from C.
1547     */
1548     Abops   = A->bops;
1549     Aops    = A->ops;
1550     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1551     A->bops = Abops;
1552     A->ops  = Aops;
1553 
1554     PetscHeaderDestroy(B);
1555   }
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNC__
1560 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1561 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1562 {
1563   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1564   Mat         a = baij->A,b = baij->B;
1565   int         ierr,s1,s2,s3;
1566 
1567   PetscFunctionBegin;
1568   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1569   if (rr) {
1570     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1571     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1572     /* Overlap communication with computation. */
1573     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1574   }
1575   if (ll) {
1576     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1577     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1578     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1579   }
1580   /* scale  the diagonal block */
1581   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1582 
1583   if (rr) {
1584     /* Do a scatter end and then right scale the off-diagonal block */
1585     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1586     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1587   }
1588 
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 #undef __FUNC__
1593 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1594 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1595 {
1596   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1597   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1598   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1599   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1600   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1601   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1602   MPI_Comm       comm = A->comm;
1603   MPI_Request    *send_waits,*recv_waits;
1604   MPI_Status     recv_status,*send_status;
1605   IS             istmp;
1606 
1607   PetscFunctionBegin;
1608   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1609   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1610 
1611   /*  first count number of contributors to each processor */
1612   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1613   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1614   procs  = nprocs + size;
1615   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1616   for (i=0; i<N; i++) {
1617     idx   = rows[i];
1618     found = 0;
1619     for (j=0; j<size; j++) {
1620       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1621         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1622       }
1623     }
1624     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1625   }
1626   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1627 
1628   /* inform other processors of number of messages and max length*/
1629   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1630   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1631   nmax   = work[rank];
1632   nrecvs = work[size+rank];
1633   ierr = PetscFree(work);CHKERRQ(ierr);
1634 
1635   /* post receives:   */
1636   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1637   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1638   for (i=0; i<nrecvs; i++) {
1639     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1640   }
1641 
1642   /* do sends:
1643      1) starts[i] gives the starting index in svalues for stuff going to
1644      the ith processor
1645   */
1646   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1647   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1648   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1649   starts[0]  = 0;
1650   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1651   for (i=0; i<N; i++) {
1652     svalues[starts[owner[i]]++] = rows[i];
1653   }
1654   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1655 
1656   starts[0] = 0;
1657   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1658   count = 0;
1659   for (i=0; i<size; i++) {
1660     if (procs[i]) {
1661       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1662     }
1663   }
1664   ierr = PetscFree(starts);CHKERRQ(ierr);
1665 
1666   base = owners[rank]*bs;
1667 
1668   /*  wait on receives */
1669   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1670   source = lens + nrecvs;
1671   count  = nrecvs; slen = 0;
1672   while (count) {
1673     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1674     /* unpack receives into our local space */
1675     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1676     source[imdex]  = recv_status.MPI_SOURCE;
1677     lens[imdex]    = n;
1678     slen          += n;
1679     count--;
1680   }
1681   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1682 
1683   /* move the data into the send scatter */
1684   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1685   count = 0;
1686   for (i=0; i<nrecvs; i++) {
1687     values = rvalues + i*nmax;
1688     for (j=0; j<lens[i]; j++) {
1689       lrows[count++] = values[j] - base;
1690     }
1691   }
1692   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1693   ierr = PetscFree(lens);CHKERRQ(ierr);
1694   ierr = PetscFree(owner);CHKERRQ(ierr);
1695   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1696 
1697   /* actually zap the local rows */
1698   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1699   PLogObjectParent(A,istmp);
1700 
1701   /*
1702         Zero the required rows. If the "diagonal block" of the matrix
1703      is square and the user wishes to set the diagonal we use seperate
1704      code so that MatSetValues() is not called for each diagonal allocating
1705      new memory, thus calling lots of mallocs and slowing things down.
1706 
1707        Contributed by: Mathew Knepley
1708   */
1709   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1710   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
1711   if (diag && (l->A->M == l->A->N)) {
1712     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
1713   } else if (diag) {
1714     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1715     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1716       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1717 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1718     }
1719     for (i = 0; i < slen; i++) {
1720       row  = lrows[i] + rstart_bs;
1721       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1722     }
1723     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1725   } else {
1726     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1727   }
1728 
1729   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1730   ierr = PetscFree(lrows);CHKERRQ(ierr);
1731 
1732   /* wait on sends */
1733   if (nsends) {
1734     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1735     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1736     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1737   }
1738   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1739   ierr = PetscFree(svalues);CHKERRQ(ierr);
1740 
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNC__
1745 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1746 int MatPrintHelp_MPIBAIJ(Mat A)
1747 {
1748   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1749   MPI_Comm    comm = A->comm;
1750   static int  called = 0;
1751   int         ierr;
1752 
1753   PetscFunctionBegin;
1754   if (!a->rank) {
1755     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1756   }
1757   if (called) {PetscFunctionReturn(0);} else called = 1;
1758   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1759   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNC__
1764 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1765 int MatSetUnfactored_MPIBAIJ(Mat A)
1766 {
1767   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1768   int         ierr;
1769 
1770   PetscFunctionBegin;
1771   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1776 
1777 #undef __FUNC__
1778 #define __FUNC__ "MatEqual_MPIBAIJ"
1779 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1780 {
1781   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1782   Mat         a,b,c,d;
1783   PetscTruth  flg;
1784   int         ierr;
1785 
1786   PetscFunctionBegin;
1787   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1788   a = matA->A; b = matA->B;
1789   c = matB->A; d = matB->B;
1790 
1791   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1792   if (flg == PETSC_TRUE) {
1793     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1794   }
1795   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 /* -------------------------------------------------------------------*/
1800 static struct _MatOps MatOps_Values = {
1801   MatSetValues_MPIBAIJ,
1802   MatGetRow_MPIBAIJ,
1803   MatRestoreRow_MPIBAIJ,
1804   MatMult_MPIBAIJ,
1805   MatMultAdd_MPIBAIJ,
1806   MatMultTranspose_MPIBAIJ,
1807   MatMultTransposeAdd_MPIBAIJ,
1808   0,
1809   0,
1810   0,
1811   0,
1812   0,
1813   0,
1814   0,
1815   MatTranspose_MPIBAIJ,
1816   MatGetInfo_MPIBAIJ,
1817   MatEqual_MPIBAIJ,
1818   MatGetDiagonal_MPIBAIJ,
1819   MatDiagonalScale_MPIBAIJ,
1820   MatNorm_MPIBAIJ,
1821   MatAssemblyBegin_MPIBAIJ,
1822   MatAssemblyEnd_MPIBAIJ,
1823   0,
1824   MatSetOption_MPIBAIJ,
1825   MatZeroEntries_MPIBAIJ,
1826   MatZeroRows_MPIBAIJ,
1827   0,
1828   0,
1829   0,
1830   0,
1831   MatGetSize_MPIBAIJ,
1832   MatGetLocalSize_MPIBAIJ,
1833   MatGetOwnershipRange_MPIBAIJ,
1834   0,
1835   0,
1836   0,
1837   0,
1838   MatDuplicate_MPIBAIJ,
1839   0,
1840   0,
1841   0,
1842   0,
1843   0,
1844   MatGetSubMatrices_MPIBAIJ,
1845   MatIncreaseOverlap_MPIBAIJ,
1846   MatGetValues_MPIBAIJ,
1847   0,
1848   MatPrintHelp_MPIBAIJ,
1849   MatScale_MPIBAIJ,
1850   0,
1851   0,
1852   0,
1853   MatGetBlockSize_MPIBAIJ,
1854   0,
1855   0,
1856   0,
1857   0,
1858   0,
1859   0,
1860   MatSetUnfactored_MPIBAIJ,
1861   0,
1862   MatSetValuesBlocked_MPIBAIJ,
1863   0,
1864   0,
1865   0,
1866   MatGetMaps_Petsc};
1867 
1868 
1869 EXTERN_C_BEGIN
1870 #undef __FUNC__
1871 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1872 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1873 {
1874   PetscFunctionBegin;
1875   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1876   *iscopy = PETSC_FALSE;
1877   PetscFunctionReturn(0);
1878 }
1879 EXTERN_C_END
1880 
1881 #undef __FUNC__
1882 #define __FUNC__ "MatCreateMPIBAIJ"
1883 /*@C
1884    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1885    (block compressed row).  For good matrix assembly performance
1886    the user should preallocate the matrix storage by setting the parameters
1887    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1888    performance can be increased by more than a factor of 50.
1889 
1890    Collective on MPI_Comm
1891 
1892    Input Parameters:
1893 +  comm - MPI communicator
1894 .  bs   - size of blockk
1895 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1896            This value should be the same as the local size used in creating the
1897            y vector for the matrix-vector product y = Ax.
1898 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1899            This value should be the same as the local size used in creating the
1900            x vector for the matrix-vector product y = Ax.
1901 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1902 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1903 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1904            submatrix  (same for all local rows)
1905 .  d_nnz - array containing the number of block nonzeros in the various block rows
1906            of the in diagonal portion of the local (possibly different for each block
1907            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1908 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1909            submatrix (same for all local rows).
1910 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1911            off-diagonal portion of the local submatrix (possibly different for
1912            each block row) or PETSC_NULL.
1913 
1914    Output Parameter:
1915 .  A - the matrix
1916 
1917    Options Database Keys:
1918 .   -mat_no_unroll - uses code that does not unroll the loops in the
1919                      block calculations (much slower)
1920 .   -mat_block_size - size of the blocks to use
1921 .   -mat_mpi - use the parallel matrix data structures even on one processor
1922                (defaults to using SeqBAIJ format on one processor)
1923 
1924    Notes:
1925    The user MUST specify either the local or global matrix dimensions
1926    (possibly both).
1927 
1928    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1929    than it must be used on all processors that share the object for that argument.
1930 
1931    Storage Information:
1932    For a square global matrix we define each processor's diagonal portion
1933    to be its local rows and the corresponding columns (a square submatrix);
1934    each processor's off-diagonal portion encompasses the remainder of the
1935    local matrix (a rectangular submatrix).
1936 
1937    The user can specify preallocated storage for the diagonal part of
1938    the local submatrix with either d_nz or d_nnz (not both).  Set
1939    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1940    memory allocation.  Likewise, specify preallocated storage for the
1941    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1942 
1943    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1944    the figure below we depict these three local rows and all columns (0-11).
1945 
1946 .vb
1947            0 1 2 3 4 5 6 7 8 9 10 11
1948           -------------------
1949    row 3  |  o o o d d d o o o o o o
1950    row 4  |  o o o d d d o o o o o o
1951    row 5  |  o o o d d d o o o o o o
1952           -------------------
1953 .ve
1954 
1955    Thus, any entries in the d locations are stored in the d (diagonal)
1956    submatrix, and any entries in the o locations are stored in the
1957    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1958    stored simply in the MATSEQBAIJ format for compressed row storage.
1959 
1960    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1961    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1962    In general, for PDE problems in which most nonzeros are near the diagonal,
1963    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1964    or you will get TERRIBLE performance; see the users' manual chapter on
1965    matrices.
1966 
1967    Level: intermediate
1968 
1969 .keywords: matrix, block, aij, compressed row, sparse, parallel
1970 
1971 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1972 @*/
1973 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1974 {
1975   Mat          B;
1976   Mat_MPIBAIJ  *b;
1977   int          ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
1978   PetscTruth   flag1,flag2,flg;
1979 
1980   PetscFunctionBegin;
1981   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1982 
1983   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1984   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
1985   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
1986   if (d_nnz) {
1987     for (i=0; i<m/bs; i++) {
1988       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1989     }
1990   }
1991   if (o_nnz) {
1992     for (i=0; i<m/bs; i++) {
1993       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1994     }
1995   }
1996 
1997   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1998   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
1999   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2000   if (!flag1 && !flag2 && size == 1) {
2001     if (M == PETSC_DECIDE) M = m;
2002     if (N == PETSC_DECIDE) N = n;
2003     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2004     PetscFunctionReturn(0);
2005   }
2006 
2007   *A = 0;
2008   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2009   PLogObjectCreate(B);
2010   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2011   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2012   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2013 
2014   B->ops->destroy    = MatDestroy_MPIBAIJ;
2015   B->ops->view       = MatView_MPIBAIJ;
2016   B->mapping    = 0;
2017   B->factor     = 0;
2018   B->assembled  = PETSC_FALSE;
2019 
2020   B->insertmode = NOT_SET_VALUES;
2021   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2022   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2023 
2024   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2025     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2026   }
2027   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2028     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2029   }
2030   if (N == PETSC_DECIDE && n == PETSC_DECIDE) {
2031     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2032   }
2033   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2034   if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2035 
2036   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2037     work[0] = m; work[1] = n;
2038     mbs = m/bs; nbs = n/bs;
2039     ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2040     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2041     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2042   }
2043   if (m == PETSC_DECIDE) {
2044     Mbs = M/bs;
2045     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2046     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2047     m   = mbs*bs;
2048   }
2049   if (n == PETSC_DECIDE) {
2050     Nbs = N/bs;
2051     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2052     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2053     n   = nbs*bs;
2054   }
2055   if (mbs*bs != m || nbs*bs != n) {
2056     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2057   }
2058 
2059   b->m = m; B->m = m;
2060   b->n = n; B->n = n;
2061   b->N = N; B->N = N;
2062   b->M = M; B->M = M;
2063   b->bs  = bs;
2064   b->bs2 = bs*bs;
2065   b->mbs = mbs;
2066   b->nbs = nbs;
2067   b->Mbs = Mbs;
2068   b->Nbs = Nbs;
2069 
2070   /* the information in the maps duplicates the information computed below, eventually
2071      we should remove the duplicate information that is not contained in the maps */
2072   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2073   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2074 
2075   /* build local table of row and column ownerships */
2076   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2077   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2078   b->cowners    = b->rowners + b->size + 2;
2079   b->rowners_bs = b->cowners + b->size + 2;
2080   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2081   b->rowners[0]    = 0;
2082   for (i=2; i<=b->size; i++) {
2083     b->rowners[i] += b->rowners[i-1];
2084   }
2085   for (i=0; i<=b->size; i++) {
2086     b->rowners_bs[i] = b->rowners[i]*bs;
2087   }
2088   b->rstart    = b->rowners[b->rank];
2089   b->rend      = b->rowners[b->rank+1];
2090   b->rstart_bs = b->rstart * bs;
2091   b->rend_bs   = b->rend * bs;
2092 
2093   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2094   b->cowners[0] = 0;
2095   for (i=2; i<=b->size; i++) {
2096     b->cowners[i] += b->cowners[i-1];
2097   }
2098   b->cstart    = b->cowners[b->rank];
2099   b->cend      = b->cowners[b->rank+1];
2100   b->cstart_bs = b->cstart * bs;
2101   b->cend_bs   = b->cend * bs;
2102 
2103 
2104   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2105   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2106   PLogObjectParent(B,b->A);
2107   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2108   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2109   PLogObjectParent(B,b->B);
2110 
2111   /* build cache for off array entries formed */
2112   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2113   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2114   b->donotstash  = PETSC_FALSE;
2115   b->colmap      = PETSC_NULL;
2116   b->garray      = PETSC_NULL;
2117   b->roworiented = PETSC_TRUE;
2118 
2119   /* stuff used in block assembly */
2120   b->barray       = 0;
2121 
2122   /* stuff used for matrix vector multiply */
2123   b->lvec         = 0;
2124   b->Mvctx        = 0;
2125 
2126   /* stuff for MatGetRow() */
2127   b->rowindices   = 0;
2128   b->rowvalues    = 0;
2129   b->getrowactive = PETSC_FALSE;
2130 
2131   /* hash table stuff */
2132   b->ht           = 0;
2133   b->hd           = 0;
2134   b->ht_size      = 0;
2135   b->ht_flag      = PETSC_FALSE;
2136   b->ht_fact      = 0;
2137   b->ht_total_ct  = 0;
2138   b->ht_insert_ct = 0;
2139 
2140   *A = B;
2141   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2142   if (flg) {
2143     double fact = 1.39;
2144     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2145     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2146     if (fact <= 1.0) fact = 1.39;
2147     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2148     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2149   }
2150   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2151                                      "MatStoreValues_MPIBAIJ",
2152                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2153   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2154                                      "MatRetrieveValues_MPIBAIJ",
2155                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2156   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2157                                      "MatGetDiagonalBlock_MPIBAIJ",
2158                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 #undef __FUNC__
2163 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2164 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2165 {
2166   Mat         mat;
2167   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2168   int         ierr,len=0;
2169   PetscTruth  flg;
2170 
2171   PetscFunctionBegin;
2172   *newmat       = 0;
2173   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2174   PLogObjectCreate(mat);
2175   mat->data         = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2176   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2177   mat->ops->destroy = MatDestroy_MPIBAIJ;
2178   mat->ops->view    = MatView_MPIBAIJ;
2179   mat->factor       = matin->factor;
2180   mat->assembled    = PETSC_TRUE;
2181   mat->insertmode   = NOT_SET_VALUES;
2182 
2183   a->m = mat->m   = oldmat->m;
2184   a->n = mat->n   = oldmat->n;
2185   a->M = mat->M   = oldmat->M;
2186   a->N = mat->N   = oldmat->N;
2187 
2188   a->bs  = oldmat->bs;
2189   a->bs2 = oldmat->bs2;
2190   a->mbs = oldmat->mbs;
2191   a->nbs = oldmat->nbs;
2192   a->Mbs = oldmat->Mbs;
2193   a->Nbs = oldmat->Nbs;
2194 
2195   a->rstart       = oldmat->rstart;
2196   a->rend         = oldmat->rend;
2197   a->cstart       = oldmat->cstart;
2198   a->cend         = oldmat->cend;
2199   a->size         = oldmat->size;
2200   a->rank         = oldmat->rank;
2201   a->donotstash   = oldmat->donotstash;
2202   a->roworiented  = oldmat->roworiented;
2203   a->rowindices   = 0;
2204   a->rowvalues    = 0;
2205   a->getrowactive = PETSC_FALSE;
2206   a->barray       = 0;
2207   a->rstart_bs    = oldmat->rstart_bs;
2208   a->rend_bs      = oldmat->rend_bs;
2209   a->cstart_bs    = oldmat->cstart_bs;
2210   a->cend_bs      = oldmat->cend_bs;
2211 
2212   /* hash table stuff */
2213   a->ht           = 0;
2214   a->hd           = 0;
2215   a->ht_size      = 0;
2216   a->ht_flag      = oldmat->ht_flag;
2217   a->ht_fact      = oldmat->ht_fact;
2218   a->ht_total_ct  = 0;
2219   a->ht_insert_ct = 0;
2220 
2221 
2222   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2223   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2224   a->cowners    = a->rowners + a->size + 2;
2225   a->rowners_bs = a->cowners + a->size + 2;
2226   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2227   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2228   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2229   if (oldmat->colmap) {
2230 #if defined (PETSC_USE_CTABLE)
2231   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2232 #else
2233     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2234     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2235     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2236 #endif
2237   } else a->colmap = 0;
2238   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2239     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2240     PLogObjectMemory(mat,len*sizeof(int));
2241     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2242   } else a->garray = 0;
2243 
2244   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2245   PLogObjectParent(mat,a->lvec);
2246   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2247 
2248   PLogObjectParent(mat,a->Mvctx);
2249   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2250   PLogObjectParent(mat,a->A);
2251   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2252   PLogObjectParent(mat,a->B);
2253   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2254   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2255   if (flg) {
2256     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2257   }
2258   *newmat = mat;
2259   PetscFunctionReturn(0);
2260 }
2261 
2262 #include "sys.h"
2263 
2264 #undef __FUNC__
2265 #define __FUNC__ "MatLoad_MPIBAIJ"
2266 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2267 {
2268   Mat          A;
2269   int          i,nz,ierr,j,rstart,rend,fd;
2270   Scalar       *vals,*buf;
2271   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2272   MPI_Status   status;
2273   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2274   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2275   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2276   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2277   int          dcount,kmax,k,nzcount,tmp;
2278 
2279   PetscFunctionBegin;
2280   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2281 
2282   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2283   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2284   if (!rank) {
2285     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2286     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2287     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2288     if (header[3] < 0) {
2289       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2290     }
2291   }
2292 
2293   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2294   M = header[1]; N = header[2];
2295 
2296   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2297 
2298   /*
2299      This code adds extra rows to make sure the number of rows is
2300      divisible by the blocksize
2301   */
2302   Mbs        = M/bs;
2303   extra_rows = bs - M + bs*(Mbs);
2304   if (extra_rows == bs) extra_rows = 0;
2305   else                  Mbs++;
2306   if (extra_rows &&!rank) {
2307     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2308   }
2309 
2310   /* determine ownership of all rows */
2311   mbs = Mbs/size + ((Mbs % size) > rank);
2312   m   = mbs * bs;
2313   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2314   browners = rowners + size + 1;
2315   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2316   rowners[0] = 0;
2317   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2318   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2319   rstart = rowners[rank];
2320   rend   = rowners[rank+1];
2321 
2322   /* distribute row lengths to all processors */
2323   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2324   if (!rank) {
2325     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2326     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2327     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2328     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2329     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2330     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2331     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2332   } else {
2333     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2334   }
2335 
2336   if (!rank) {
2337     /* calculate the number of nonzeros on each processor */
2338     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2339     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2340     for (i=0; i<size; i++) {
2341       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2342         procsnz[i] += rowlengths[j];
2343       }
2344     }
2345     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2346 
2347     /* determine max buffer needed and allocate it */
2348     maxnz = 0;
2349     for (i=0; i<size; i++) {
2350       maxnz = PetscMax(maxnz,procsnz[i]);
2351     }
2352     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2353 
2354     /* read in my part of the matrix column indices  */
2355     nz = procsnz[0];
2356     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2357     mycols = ibuf;
2358     if (size == 1)  nz -= extra_rows;
2359     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2360     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2361 
2362     /* read in every ones (except the last) and ship off */
2363     for (i=1; i<size-1; i++) {
2364       nz   = procsnz[i];
2365       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2366       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2367     }
2368     /* read in the stuff for the last proc */
2369     if (size != 1) {
2370       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2371       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2372       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2373       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2374     }
2375     ierr = PetscFree(cols);CHKERRQ(ierr);
2376   } else {
2377     /* determine buffer space needed for message */
2378     nz = 0;
2379     for (i=0; i<m; i++) {
2380       nz += locrowlens[i];
2381     }
2382     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2383     mycols = ibuf;
2384     /* receive message of column indices*/
2385     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2386     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2387     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2388   }
2389 
2390   /* loop over local rows, determining number of off diagonal entries */
2391   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2392   odlens = dlens + (rend-rstart);
2393   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2394   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2395   masked1 = mask    + Mbs;
2396   masked2 = masked1 + Mbs;
2397   rowcount = 0; nzcount = 0;
2398   for (i=0; i<mbs; i++) {
2399     dcount  = 0;
2400     odcount = 0;
2401     for (j=0; j<bs; j++) {
2402       kmax = locrowlens[rowcount];
2403       for (k=0; k<kmax; k++) {
2404         tmp = mycols[nzcount++]/bs;
2405         if (!mask[tmp]) {
2406           mask[tmp] = 1;
2407           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2408           else masked1[dcount++] = tmp;
2409         }
2410       }
2411       rowcount++;
2412     }
2413 
2414     dlens[i]  = dcount;
2415     odlens[i] = odcount;
2416 
2417     /* zero out the mask elements we set */
2418     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2419     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2420   }
2421 
2422   /* create our matrix */
2423   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2424   A = *newmat;
2425   MatSetOption(A,MAT_COLUMNS_SORTED);
2426 
2427   if (!rank) {
2428     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2429     /* read in my part of the matrix numerical values  */
2430     nz = procsnz[0];
2431     vals = buf;
2432     mycols = ibuf;
2433     if (size == 1)  nz -= extra_rows;
2434     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2435     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2436 
2437     /* insert into matrix */
2438     jj      = rstart*bs;
2439     for (i=0; i<m; i++) {
2440       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2441       mycols += locrowlens[i];
2442       vals   += locrowlens[i];
2443       jj++;
2444     }
2445     /* read in other processors (except the last one) and ship out */
2446     for (i=1; i<size-1; i++) {
2447       nz   = procsnz[i];
2448       vals = buf;
2449       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2450       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2451     }
2452     /* the last proc */
2453     if (size != 1){
2454       nz   = procsnz[i] - extra_rows;
2455       vals = buf;
2456       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2457       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2458       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2459     }
2460     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2461   } else {
2462     /* receive numeric values */
2463     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2464 
2465     /* receive message of values*/
2466     vals   = buf;
2467     mycols = ibuf;
2468     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2469     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2470     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2471 
2472     /* insert into matrix */
2473     jj      = rstart*bs;
2474     for (i=0; i<m; i++) {
2475       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2476       mycols += locrowlens[i];
2477       vals   += locrowlens[i];
2478       jj++;
2479     }
2480   }
2481   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2482   ierr = PetscFree(buf);CHKERRQ(ierr);
2483   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2484   ierr = PetscFree(rowners);CHKERRQ(ierr);
2485   ierr = PetscFree(dlens);CHKERRQ(ierr);
2486   ierr = PetscFree(mask);CHKERRQ(ierr);
2487   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2488   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNC__
2493 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2494 /*@
2495    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2496 
2497    Input Parameters:
2498 .  mat  - the matrix
2499 .  fact - factor
2500 
2501    Collective on Mat
2502 
2503    Level: advanced
2504 
2505   Notes:
2506    This can also be set by the command line option: -mat_use_hash_table fact
2507 
2508 .keywords: matrix, hashtable, factor, HT
2509 
2510 .seealso: MatSetOption()
2511 @*/
2512 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2513 {
2514   Mat_MPIBAIJ *baij;
2515 
2516   PetscFunctionBegin;
2517   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2518   if (mat->type != MATMPIBAIJ) {
2519     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2520   }
2521   baij = (Mat_MPIBAIJ*)mat->data;
2522   baij->ht_fact = fact;
2523   PetscFunctionReturn(0);
2524 }
2525