xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a3b388050d4e8a4c38128e174a7572fa24ee1ba5)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.173 1999/06/15 15:31:01 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 EXTERN_C_BEGIN
20 #undef __FUNC__
21 #define __FUNC__ "MatStoreValues_MPIBAIJ"
22 int MatStoreValues_MPIBAIJ(Mat mat)
23 {
24   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
25   int         ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
29   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 EXTERN_C_END
33 
34 EXTERN_C_BEGIN
35 #undef __FUNC__
36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
37 int MatRetrieveValues_MPIBAIJ(Mat mat)
38 {
39   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
40   int         ierr;
41 
42   PetscFunctionBegin;
43   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
44   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
45   PetscFunctionReturn(0);
46 }
47 EXTERN_C_END
48 
49 /*
50      Local utility routine that creates a mapping from the global column
51    number to the local number in the off-diagonal part of the local
52    storage of the matrix.  This is done in a non scable way since the
53    length of colmap equals the global matrix length.
54 */
55 #undef __FUNC__
56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
57 static int CreateColmap_MPIBAIJ_Private(Mat mat)
58 {
59   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
60   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
61   int         nbs = B->nbs,i,bs=B->bs,ierr;
62 
63   PetscFunctionBegin;
64 #if defined (PETSC_USE_CTABLE)
65   ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
66   for ( i=0; i<nbs; i++ ){
67     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
68   }
69 #else
70   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
71   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
72   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
73   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
74 #endif
75   PetscFunctionReturn(0);
76 }
77 
78 #define CHUNKSIZE  10
79 
80 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
81 { \
82  \
83     brow = row/bs;  \
84     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
85     rmax = aimax[brow]; nrow = ailen[brow]; \
86       bcol = col/bs; \
87       ridx = row % bs; cidx = col % bs; \
88       low = 0; high = nrow; \
89       while (high-low > 3) { \
90         t = (low+high)/2; \
91         if (rp[t] > bcol) high = t; \
92         else              low  = t; \
93       } \
94       for ( _i=low; _i<high; _i++ ) { \
95         if (rp[_i] > bcol) break; \
96         if (rp[_i] == bcol) { \
97           bap  = ap +  bs2*_i + bs*cidx + ridx; \
98           if (addv == ADD_VALUES) *bap += value;  \
99           else                    *bap  = value;  \
100           goto a_noinsert; \
101         } \
102       } \
103       if (a->nonew == 1) goto a_noinsert; \
104       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
105       if (nrow >= rmax) { \
106         /* there is no extra room in row, therefore enlarge */ \
107         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
108         Scalar *new_a; \
109  \
110         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
111  \
112         /* malloc new storage space */ \
113         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
114         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
115         new_j   = (int *) (new_a + bs2*new_nz); \
116         new_i   = new_j + new_nz; \
117  \
118         /* copy over old data into new slots */ \
119         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
120         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
121         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
122         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
123         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
124                           len*sizeof(int));CHKERRQ(ierr); \
125         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
126         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
127         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
128                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
129         /* free up old matrix storage */ \
130         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
131         if (!a->singlemalloc) { \
132           ierr = PetscFree(a->i);CHKERRQ(ierr); \
133           ierr = PetscFree(a->j);CHKERRQ(ierr);\
134         } \
135         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
136         a->singlemalloc = 1; \
137  \
138         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
139         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
140         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
141         a->maxnz += bs2*CHUNKSIZE; \
142         a->reallocs++; \
143         a->nz++; \
144       } \
145       N = nrow++ - 1;  \
146       /* shift up all the later entries in this row */ \
147       for ( ii=N; ii>=_i; ii-- ) { \
148         rp[ii+1] = rp[ii]; \
149         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
150       } \
151       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
152       rp[_i]                      = bcol;  \
153       ap[bs2*_i + bs*cidx + ridx] = value;  \
154       a_noinsert:; \
155     ailen[brow] = nrow; \
156 }
157 
158 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
159 { \
160  \
161     brow = row/bs;  \
162     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
163     rmax = bimax[brow]; nrow = bilen[brow]; \
164       bcol = col/bs; \
165       ridx = row % bs; cidx = col % bs; \
166       low = 0; high = nrow; \
167       while (high-low > 3) { \
168         t = (low+high)/2; \
169         if (rp[t] > bcol) high = t; \
170         else              low  = t; \
171       } \
172       for ( _i=low; _i<high; _i++ ) { \
173         if (rp[_i] > bcol) break; \
174         if (rp[_i] == bcol) { \
175           bap  = ap +  bs2*_i + bs*cidx + ridx; \
176           if (addv == ADD_VALUES) *bap += value;  \
177           else                    *bap  = value;  \
178           goto b_noinsert; \
179         } \
180       } \
181       if (b->nonew == 1) goto b_noinsert; \
182       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
183       if (nrow >= rmax) { \
184         /* there is no extra room in row, therefore enlarge */ \
185         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
186         Scalar *new_a; \
187  \
188         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
189  \
190         /* malloc new storage space */ \
191         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
192         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
193         new_j   = (int *) (new_a + bs2*new_nz); \
194         new_i   = new_j + new_nz; \
195  \
196         /* copy over old data into new slots */ \
197         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
198         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
199         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
200         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
201         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
202                            len*sizeof(int));CHKERRQ(ierr); \
203         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
204         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
205         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
206                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
207         /* free up old matrix storage */ \
208         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
209         if (!b->singlemalloc) { \
210           ierr = PetscFree(b->i);CHKERRQ(ierr); \
211           ierr = PetscFree(b->j);CHKERRQ(ierr); \
212         } \
213         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
214         b->singlemalloc = 1; \
215  \
216         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
217         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
218         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
219         b->maxnz += bs2*CHUNKSIZE; \
220         b->reallocs++; \
221         b->nz++; \
222       } \
223       N = nrow++ - 1;  \
224       /* shift up all the later entries in this row */ \
225       for ( ii=N; ii>=_i; ii-- ) { \
226         rp[ii+1] = rp[ii]; \
227         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
228       } \
229       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
230       rp[_i]                      = bcol;  \
231       ap[bs2*_i + bs*cidx + ridx] = value;  \
232       b_noinsert:; \
233     bilen[brow] = nrow; \
234 }
235 
236 #undef __FUNC__
237 #define __FUNC__ "MatSetValues_MPIBAIJ"
238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
239 {
240   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
241   Scalar      value;
242   int         ierr,i,j,row,col;
243   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
244   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
245   int         cend_orig=baij->cend_bs,bs=baij->bs;
246 
247   /* Some Variables required in the macro */
248   Mat         A = baij->A;
249   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
250   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
251   Scalar      *aa=a->a;
252 
253   Mat         B = baij->B;
254   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
255   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
256   Scalar      *ba=b->a;
257 
258   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
259   int         low,high,t,ridx,cidx,bs2=a->bs2;
260   Scalar      *ap,*bap;
261 
262   PetscFunctionBegin;
263   for ( i=0; i<m; i++ ) {
264     if (im[i] < 0) continue;
265 #if defined(PETSC_USE_BOPT_g)
266     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
267 #endif
268     if (im[i] >= rstart_orig && im[i] < rend_orig) {
269       row = im[i] - rstart_orig;
270       for ( j=0; j<n; j++ ) {
271         if (in[j] >= cstart_orig && in[j] < cend_orig){
272           col = in[j] - cstart_orig;
273           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
274           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
275           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
276         } else if (in[j] < 0) continue;
277 #if defined(PETSC_USE_BOPT_g)
278         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
279 #endif
280         else {
281           if (mat->was_assembled) {
282             if (!baij->colmap) {
283               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
284             }
285 #if defined (PETSC_USE_CTABLE)
286             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
287             col  = col - 1 + in[j]%bs;
288 #else
289             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
290 #endif
291             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
292               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
293               col =  in[j];
294               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
295               B = baij->B;
296               b = (Mat_SeqBAIJ *) (B)->data;
297               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
298               ba=b->a;
299             }
300           } else col = in[j];
301           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
302           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
303           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
304         }
305       }
306     } else {
307       if ( !baij->donotstash) {
308         if (roworiented) {
309           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
310         } else {
311           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
312         }
313       }
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
322 {
323   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
324   Scalar      *value,*barray=baij->barray;
325   int         ierr,i,j,ii,jj,row,col;
326   int         roworiented = baij->roworiented,rstart=baij->rstart ;
327   int         rend=baij->rend,cstart=baij->cstart,stepval;
328   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
329 
330   if(!barray) {
331     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
332   }
333 
334   if (roworiented) {
335     stepval = (n-1)*bs;
336   } else {
337     stepval = (m-1)*bs;
338   }
339   for ( i=0; i<m; i++ ) {
340     if (im[i] < 0) continue;
341 #if defined(PETSC_USE_BOPT_g)
342     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
343 #endif
344     if (im[i] >= rstart && im[i] < rend) {
345       row = im[i] - rstart;
346       for ( j=0; j<n; j++ ) {
347         /* If NumCol = 1 then a copy is not required */
348         if ((roworiented) && (n == 1)) {
349           barray = v + i*bs2;
350         } else if((!roworiented) && (m == 1)) {
351           barray = v + j*bs2;
352         } else { /* Here a copy is required */
353           if (roworiented) {
354             value = v + i*(stepval+bs)*bs + j*bs;
355           } else {
356             value = v + j*(stepval+bs)*bs + i*bs;
357           }
358           for ( ii=0; ii<bs; ii++,value+=stepval ) {
359             for (jj=0; jj<bs; jj++ ) {
360               *barray++  = *value++;
361             }
362           }
363           barray -=bs2;
364         }
365 
366         if (in[j] >= cstart && in[j] < cend){
367           col  = in[j] - cstart;
368           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
369         }
370         else if (in[j] < 0) continue;
371 #if defined(PETSC_USE_BOPT_g)
372         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
373 #endif
374         else {
375           if (mat->was_assembled) {
376             if (!baij->colmap) {
377               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
378             }
379 
380 #if defined(PETSC_USE_BOPT_g)
381 #if defined (PETSC_USE_CTABLE)
382             { int data;
383             ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
384             if((data - 1) % bs)
385 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
386             }
387 #else
388             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
389 #endif
390 #endif
391 #if defined (PETSC_USE_CTABLE)
392 	    ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
393             col  = (col - 1)/bs;
394 #else
395             col = (baij->colmap[in[j]] - 1)/bs;
396 #endif
397             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
398               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
399               col =  in[j];
400             }
401           }
402           else col = in[j];
403           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
404         }
405       }
406     } else {
407       if (!baij->donotstash) {
408         if (roworiented) {
409           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
410         } else {
411           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         }
413       }
414     }
415   }
416   PetscFunctionReturn(0);
417 }
418 #define HASH_KEY 0.6180339887
419 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
420 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
421 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
422 #undef __FUNC__
423 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
424 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
425 {
426   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
427   int         ierr,i,j,row,col;
428   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
429   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
430   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
431   double      tmp;
432   Scalar      ** HD = baij->hd,value;
433 #if defined(PETSC_USE_BOPT_g)
434   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
435 #endif
436 
437   PetscFunctionBegin;
438 
439   for ( i=0; i<m; i++ ) {
440 #if defined(PETSC_USE_BOPT_g)
441     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
442     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
443 #endif
444       row = im[i];
445     if (row >= rstart_orig && row < rend_orig) {
446       for ( j=0; j<n; j++ ) {
447         col = in[j];
448         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
449         /* Look up into the Hash Table */
450         key = (row/bs)*Nbs+(col/bs)+1;
451         h1  = HASH(size,key,tmp);
452 
453 
454         idx = h1;
455 #if defined(PETSC_USE_BOPT_g)
456         insert_ct++;
457         total_ct++;
458         if (HT[idx] != key) {
459           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
460           if (idx == size) {
461             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
462             if (idx == h1) {
463               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
464             }
465           }
466         }
467 #else
468         if (HT[idx] != key) {
469           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
470           if (idx == size) {
471             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
472             if (idx == h1) {
473               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
474             }
475           }
476         }
477 #endif
478         /* A HASH table entry is found, so insert the values at the correct address */
479         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
480         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
481       }
482     } else {
483       if (!baij->donotstash) {
484         if (roworiented) {
485           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
486         } else {
487           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
488         }
489       }
490     }
491   }
492 #if defined(PETSC_USE_BOPT_g)
493   baij->ht_total_ct = total_ct;
494   baij->ht_insert_ct = insert_ct;
495 #endif
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNC__
500 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
501 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
502 {
503   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
504   int         ierr,i,j,ii,jj,row,col;
505   int         roworiented = baij->roworiented,rstart=baij->rstart ;
506   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
507   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
508   double      tmp;
509   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
510 #if defined(PETSC_USE_BOPT_g)
511   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
512 #endif
513 
514   PetscFunctionBegin;
515 
516   if (roworiented) {
517     stepval = (n-1)*bs;
518   } else {
519     stepval = (m-1)*bs;
520   }
521   for ( i=0; i<m; i++ ) {
522 #if defined(PETSC_USE_BOPT_g)
523     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
524     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
525 #endif
526     row   = im[i];
527     v_t   = v + i*bs2;
528     if (row >= rstart && row < rend) {
529       for ( j=0; j<n; j++ ) {
530         col = in[j];
531 
532         /* Look up into the Hash Table */
533         key = row*Nbs+col+1;
534         h1  = HASH(size,key,tmp);
535 
536         idx = h1;
537 #if defined(PETSC_USE_BOPT_g)
538         total_ct++;
539         insert_ct++;
540        if (HT[idx] != key) {
541           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
542           if (idx == size) {
543             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
544             if (idx == h1) {
545               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
546             }
547           }
548         }
549 #else
550         if (HT[idx] != key) {
551           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
552           if (idx == size) {
553             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
554             if (idx == h1) {
555               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
556             }
557           }
558         }
559 #endif
560         baij_a = HD[idx];
561         if (roworiented) {
562           /*value = v + i*(stepval+bs)*bs + j*bs;*/
563           /* value = v + (i*(stepval+bs)+j)*bs; */
564           value = v_t;
565           v_t  += bs;
566           if (addv == ADD_VALUES) {
567             for ( ii=0; ii<bs; ii++,value+=stepval) {
568               for ( jj=ii; jj<bs2; jj+=bs ) {
569                 baij_a[jj]  += *value++;
570               }
571             }
572           } else {
573             for ( ii=0; ii<bs; ii++,value+=stepval) {
574               for ( jj=ii; jj<bs2; jj+=bs ) {
575                 baij_a[jj]  = *value++;
576               }
577             }
578           }
579         } else {
580           value = v + j*(stepval+bs)*bs + i*bs;
581           if (addv == ADD_VALUES) {
582             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
583               for ( jj=0; jj<bs; jj++ ) {
584                 baij_a[jj]  += *value++;
585               }
586             }
587           } else {
588             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
589               for ( jj=0; jj<bs; jj++ ) {
590                 baij_a[jj]  = *value++;
591               }
592             }
593           }
594         }
595       }
596     } else {
597       if (!baij->donotstash) {
598         if (roworiented) {
599           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
600         } else {
601           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
602         }
603       }
604     }
605   }
606 #if defined(PETSC_USE_BOPT_g)
607   baij->ht_total_ct = total_ct;
608   baij->ht_insert_ct = insert_ct;
609 #endif
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNC__
614 #define __FUNC__ "MatGetValues_MPIBAIJ"
615 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
616 {
617   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
618   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
619   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
620 
621   PetscFunctionBegin;
622   for ( i=0; i<m; i++ ) {
623     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
624     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
625     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
626       row = idxm[i] - bsrstart;
627       for ( j=0; j<n; j++ ) {
628         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
629         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
630         if (idxn[j] >= bscstart && idxn[j] < bscend){
631           col = idxn[j] - bscstart;
632           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633         } else {
634           if (!baij->colmap) {
635             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
636           }
637 #if defined (PETSC_USE_CTABLE)
638           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
639           data --;
640 #else
641           data = baij->colmap[idxn[j]/bs]-1;
642 #endif
643           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
644           else {
645             col  = data + idxn[j]%bs;
646             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647           }
648         }
649       }
650     } else {
651       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
652     }
653   }
654  PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNC__
658 #define __FUNC__ "MatNorm_MPIBAIJ"
659 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
660 {
661   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
662   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
663   int        ierr, i,bs2=baij->bs2;
664   double     sum = 0.0;
665   Scalar     *v;
666 
667   PetscFunctionBegin;
668   if (baij->size == 1) {
669     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
670   } else {
671     if (type == NORM_FROBENIUS) {
672       v = amat->a;
673       for (i=0; i<amat->nz*bs2; i++ ) {
674 #if defined(PETSC_USE_COMPLEX)
675         sum += PetscReal(PetscConj(*v)*(*v)); v++;
676 #else
677         sum += (*v)*(*v); v++;
678 #endif
679       }
680       v = bmat->a;
681       for (i=0; i<bmat->nz*bs2; i++ ) {
682 #if defined(PETSC_USE_COMPLEX)
683         sum += PetscReal(PetscConj(*v)*(*v)); v++;
684 #else
685         sum += (*v)*(*v); v++;
686 #endif
687       }
688       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
689       *norm = sqrt(*norm);
690     } else {
691       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
692     }
693   }
694   PetscFunctionReturn(0);
695 }
696 
697 
698 /*
699   Creates the hash table, and sets the table
700   This table is created only once.
701   If new entried need to be added to the matrix
702   then the hash table has to be destroyed and
703   recreated.
704 */
705 #undef __FUNC__
706 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
707 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
708 {
709   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
710   Mat         A = baij->A, B=baij->B;
711   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
712   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
713   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
714   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
715   int         *HT,key;
716   Scalar      **HD;
717   double      tmp;
718 #if defined(PETSC_USE_BOPT_g)
719   int         ct=0,max=0;
720 #endif
721 
722   PetscFunctionBegin;
723   baij->ht_size=(int)(factor*nz);
724   size = baij->ht_size;
725 
726   if (baij->ht) {
727     PetscFunctionReturn(0);
728   }
729 
730   /* Allocate Memory for Hash Table */
731   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
732   baij->ht = (int*)(baij->hd + size);
733   HD = baij->hd;
734   HT = baij->ht;
735 
736 
737   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
738 
739 
740   /* Loop Over A */
741   for ( i=0; i<a->mbs; i++ ) {
742     for ( j=ai[i]; j<ai[i+1]; j++ ) {
743       row = i+rstart;
744       col = aj[j]+cstart;
745 
746       key = row*Nbs + col + 1;
747       h1  = HASH(size,key,tmp);
748       for ( k=0; k<size; k++ ){
749         if (HT[(h1+k)%size] == 0.0) {
750           HT[(h1+k)%size] = key;
751           HD[(h1+k)%size] = a->a + j*bs2;
752           break;
753 #if defined(PETSC_USE_BOPT_g)
754         } else {
755           ct++;
756 #endif
757         }
758       }
759 #if defined(PETSC_USE_BOPT_g)
760       if (k> max) max = k;
761 #endif
762     }
763   }
764   /* Loop Over B */
765   for ( i=0; i<b->mbs; i++ ) {
766     for ( j=bi[i]; j<bi[i+1]; j++ ) {
767       row = i+rstart;
768       col = garray[bj[j]];
769       key = row*Nbs + col + 1;
770       h1  = HASH(size,key,tmp);
771       for ( k=0; k<size; k++ ){
772         if (HT[(h1+k)%size] == 0.0) {
773           HT[(h1+k)%size] = key;
774           HD[(h1+k)%size] = b->a + j*bs2;
775           break;
776 #if defined(PETSC_USE_BOPT_g)
777         } else {
778           ct++;
779 #endif
780         }
781       }
782 #if defined(PETSC_USE_BOPT_g)
783       if (k> max) max = k;
784 #endif
785     }
786   }
787 
788   /* Print Summary */
789 #if defined(PETSC_USE_BOPT_g)
790   for ( i=0,j=0; i<size; i++)
791     if (HT[i]) {j++;}
792   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
793            (j== 0)? 0.0:((double)(ct+j))/j,max);
794 #endif
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNC__
799 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
800 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
801 {
802   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
803   int         ierr,nstash,reallocs;
804   InsertMode  addv;
805 
806   PetscFunctionBegin;
807   if (baij->donotstash) {
808     PetscFunctionReturn(0);
809   }
810 
811   /* make sure all processors are either in INSERTMODE or ADDMODE */
812   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
813   if (addv == (ADD_VALUES|INSERT_VALUES)) {
814     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
815   }
816   mat->insertmode = addv; /* in case this processor had no cache */
817 
818   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
819   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
820   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
821   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
822   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
823   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNC__
828 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
829 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
830 {
831   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
832   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
833   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
834   int         *row,*col,other_disassembled,r1,r2,r3;
835   Scalar      *val;
836   InsertMode  addv = mat->insertmode;
837 
838   PetscFunctionBegin;
839   if (!baij->donotstash) {
840     while (1) {
841       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
842       if (!flg) break;
843 
844       for ( i=0; i<n; ) {
845         /* Now identify the consecutive vals belonging to the same row */
846         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
847         if (j < n) ncols = j-i;
848         else       ncols = n-i;
849         /* Now assemble all these values with a single function call */
850         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
851         i = j;
852       }
853     }
854     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
855     /* Now process the block-stash. Since the values are stashed column-oriented,
856        set the roworiented flag to column oriented, and after MatSetValues()
857        restore the original flags */
858     r1 = baij->roworiented;
859     r2 = a->roworiented;
860     r3 = b->roworiented;
861     baij->roworiented = 0;
862     a->roworiented    = 0;
863     b->roworiented    = 0;
864     while (1) {
865       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
866       if (!flg) break;
867 
868       for ( i=0; i<n; ) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
871         if (j < n) ncols = j-i;
872         else       ncols = n-i;
873         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
874         i = j;
875       }
876     }
877     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
878     baij->roworiented = r1;
879     a->roworiented    = r2;
880     b->roworiented    = r3;
881   }
882 
883   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
884   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
885 
886   /* determine if any processor has disassembled, if so we must
887      also disassemble ourselfs, in order that we may reassemble. */
888   /*
889      if nonzero structure of submatrix B cannot change then we know that
890      no processor disassembled thus we can skip this stuff
891   */
892   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
893     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
894     if (mat->was_assembled && !other_disassembled) {
895       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
896     }
897   }
898 
899   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
900     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
901   }
902   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
903   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
904 
905 #if defined(PETSC_USE_BOPT_g)
906   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
907     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
908              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
909     baij->ht_total_ct  = 0;
910     baij->ht_insert_ct = 0;
911   }
912 #endif
913   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
914     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
915     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
916     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
917   }
918 
919   if (baij->rowvalues) {
920     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
921     baij->rowvalues = 0;
922   }
923   PetscFunctionReturn(0);
924 }
925 
926 /*
927 #undef __FUNC__
928 #define __FUNC__ "MatView_MPIBAIJ_Binary"
929 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
930 {
931   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
932   int          ierr;
933 
934   PetscFunctionBegin;
935   if (baij->size == 1) {
936     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
937   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
938   PetscFunctionReturn(0);
939 }
940 */
941 
942 #undef __FUNC__
943 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
944 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
945 {
946   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
947   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
948   FILE         *fd;
949   ViewerType   vtype;
950 
951   PetscFunctionBegin;
952   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
953   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
954     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
955     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
956       MatInfo info;
957       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
958       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
959       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
960       ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr);
961       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
962               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
963               baij->bs,(int)info.memory);
964       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
965       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
966       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
967       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
968       fflush(fd);
969       ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr);
970       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
971       PetscFunctionReturn(0);
972     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
973       ierr = PetscPrintf(mat->comm,"  block size is %d\n",bs);CHKERRQ(ierr);
974       PetscFunctionReturn(0);
975     }
976   }
977 
978   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
979     Draw       draw;
980     PetscTruth isnull;
981     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
982     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
983   }
984 
985   if (size == 1) {
986     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
987   } else {
988     /* assemble the entire matrix onto first processor. */
989     Mat         A;
990     Mat_SeqBAIJ *Aloc;
991     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
992     int         mbs = baij->mbs;
993     Scalar      *a;
994 
995     if (!rank) {
996       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
997     } else {
998       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
999     }
1000     PLogObjectParent(mat,A);
1001 
1002     /* copy over the A part */
1003     Aloc = (Mat_SeqBAIJ*) baij->A->data;
1004     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1005     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1006 
1007     for ( i=0; i<mbs; i++ ) {
1008       rvals[0] = bs*(baij->rstart + i);
1009       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1010       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1011         col = (baij->cstart+aj[j])*bs;
1012         for (k=0; k<bs; k++ ) {
1013           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1014           col++; a += bs;
1015         }
1016       }
1017     }
1018     /* copy over the B part */
1019     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1020     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1021     for ( i=0; i<mbs; i++ ) {
1022       rvals[0] = bs*(baij->rstart + i);
1023       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1024       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1025         col = baij->garray[aj[j]]*bs;
1026         for (k=0; k<bs; k++ ) {
1027           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1028           col++; a += bs;
1029         }
1030       }
1031     }
1032     ierr = PetscFree(rvals);CHKERRQ(ierr);
1033     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1034     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1035     /*
1036        Everyone has to call to draw the matrix since the graphics waits are
1037        synchronized across all processors that share the Draw object
1038     */
1039     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1040       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
1041     }
1042     ierr = MatDestroy(A);CHKERRQ(ierr);
1043   }
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 
1048 
1049 #undef __FUNC__
1050 #define __FUNC__ "MatView_MPIBAIJ"
1051 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1052 {
1053   int         ierr;
1054   ViewerType  vtype;
1055 
1056   PetscFunctionBegin;
1057   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
1058   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1059       PetscTypeCompare(vtype,SOCKET_VIEWER)|| PetscTypeCompare(vtype,BINARY_VIEWER)) {
1060     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1061     /*
1062   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1063     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1064     */
1065   } else {
1066     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNC__
1072 #define __FUNC__ "MatDestroy_MPIBAIJ"
1073 int MatDestroy_MPIBAIJ(Mat mat)
1074 {
1075   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1076   int         ierr;
1077 
1078   PetscFunctionBegin;
1079   if (--mat->refct > 0) PetscFunctionReturn(0);
1080 
1081   if (mat->mapping) {
1082     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1083   }
1084   if (mat->bmapping) {
1085     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1086   }
1087   if (mat->rmap) {
1088     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1089   }
1090   if (mat->cmap) {
1091     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1092   }
1093 #if defined(PETSC_USE_LOG)
1094   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1095 #endif
1096 
1097   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1098   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1099 
1100   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1101   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1102   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1103 #if defined (PETSC_USE_CTABLE)
1104   if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);}
1105 #else
1106   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1107 #endif
1108   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1109   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1110   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1111   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1112   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1113   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1114   ierr = PetscFree(baij);CHKERRQ(ierr);
1115   PLogObjectDestroy(mat);
1116   PetscHeaderDestroy(mat);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNC__
1121 #define __FUNC__ "MatMult_MPIBAIJ"
1122 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1123 {
1124   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1125   int         ierr, nt;
1126 
1127   PetscFunctionBegin;
1128   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1129   if (nt != a->n) {
1130     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1131   }
1132   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1133   if (nt != a->m) {
1134     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1135   }
1136   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1137   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1138   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1139   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1140   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNC__
1145 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1146 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1147 {
1148   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1149   int        ierr;
1150 
1151   PetscFunctionBegin;
1152   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1153   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1154   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1155   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNC__
1160 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1161 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1162 {
1163   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1164   int         ierr;
1165 
1166   PetscFunctionBegin;
1167   /* do nondiagonal part */
1168   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1169   /* send it on its way */
1170   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1171   /* do local part */
1172   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1173   /* receive remote parts: note this assumes the values are not actually */
1174   /* inserted in yy until the next line, which is true for my implementation*/
1175   /* but is not perhaps always true. */
1176   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNC__
1181 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1182 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1183 {
1184   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1185   int         ierr;
1186 
1187   PetscFunctionBegin;
1188   /* do nondiagonal part */
1189   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1190   /* send it on its way */
1191   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1192   /* do local part */
1193   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1194   /* receive remote parts: note this assumes the values are not actually */
1195   /* inserted in yy until the next line, which is true for my implementation*/
1196   /* but is not perhaps always true. */
1197   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 /*
1202   This only works correctly for square matrices where the subblock A->A is the
1203    diagonal block
1204 */
1205 #undef __FUNC__
1206 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1207 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1208 {
1209   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1210   int         ierr;
1211 
1212   PetscFunctionBegin;
1213   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1214   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNC__
1219 #define __FUNC__ "MatScale_MPIBAIJ"
1220 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1221 {
1222   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1223   int         ierr;
1224 
1225   PetscFunctionBegin;
1226   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1227   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNC__
1232 #define __FUNC__ "MatGetSize_MPIBAIJ"
1233 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1234 {
1235   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1236 
1237   PetscFunctionBegin;
1238   if (m) *m = mat->M;
1239   if (n) *n = mat->N;
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNC__
1244 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1245 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1246 {
1247   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1248 
1249   PetscFunctionBegin;
1250   *m = mat->m; *n = mat->n;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNC__
1255 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1256 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1257 {
1258   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1259 
1260   PetscFunctionBegin;
1261   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNC__
1266 #define __FUNC__ "MatGetRow_MPIBAIJ"
1267 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1268 {
1269   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1270   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1271   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1272   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1273   int        *cmap, *idx_p,cstart = mat->cstart;
1274 
1275   PetscFunctionBegin;
1276   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1277   mat->getrowactive = PETSC_TRUE;
1278 
1279   if (!mat->rowvalues && (idx || v)) {
1280     /*
1281         allocate enough space to hold information from the longest row.
1282     */
1283     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1284     int     max = 1,mbs = mat->mbs,tmp;
1285     for ( i=0; i<mbs; i++ ) {
1286       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1287       if (max < tmp) { max = tmp; }
1288     }
1289     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1290     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1291   }
1292 
1293   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1294   lrow = row - brstart;
1295 
1296   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1297   if (!v)   {pvA = 0; pvB = 0;}
1298   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1299   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1300   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1301   nztot = nzA + nzB;
1302 
1303   cmap  = mat->garray;
1304   if (v  || idx) {
1305     if (nztot) {
1306       /* Sort by increasing column numbers, assuming A and B already sorted */
1307       int imark = -1;
1308       if (v) {
1309         *v = v_p = mat->rowvalues;
1310         for ( i=0; i<nzB; i++ ) {
1311           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1312           else break;
1313         }
1314         imark = i;
1315         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1316         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1317       }
1318       if (idx) {
1319         *idx = idx_p = mat->rowindices;
1320         if (imark > -1) {
1321           for ( i=0; i<imark; i++ ) {
1322             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1323           }
1324         } else {
1325           for ( i=0; i<nzB; i++ ) {
1326             if (cmap[cworkB[i]/bs] < cstart)
1327               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1328             else break;
1329           }
1330           imark = i;
1331         }
1332         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1333         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1334       }
1335     } else {
1336       if (idx) *idx = 0;
1337       if (v)   *v   = 0;
1338     }
1339   }
1340   *nz = nztot;
1341   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1342   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNC__
1347 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1348 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1349 {
1350   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1351 
1352   PetscFunctionBegin;
1353   if (baij->getrowactive == PETSC_FALSE) {
1354     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1355   }
1356   baij->getrowactive = PETSC_FALSE;
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNC__
1361 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1362 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1363 {
1364   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1365 
1366   PetscFunctionBegin;
1367   *bs = baij->bs;
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNC__
1372 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1373 int MatZeroEntries_MPIBAIJ(Mat A)
1374 {
1375   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1376   int         ierr;
1377 
1378   PetscFunctionBegin;
1379   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1380   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 #undef __FUNC__
1385 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1386 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1387 {
1388   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1389   Mat         A = a->A, B = a->B;
1390   int         ierr;
1391   double      isend[5], irecv[5];
1392 
1393   PetscFunctionBegin;
1394   info->block_size     = (double)a->bs;
1395   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1396   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1397   isend[3] = info->memory;  isend[4] = info->mallocs;
1398   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1399   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1400   isend[3] += info->memory;  isend[4] += info->mallocs;
1401   if (flag == MAT_LOCAL) {
1402     info->nz_used      = isend[0];
1403     info->nz_allocated = isend[1];
1404     info->nz_unneeded  = isend[2];
1405     info->memory       = isend[3];
1406     info->mallocs      = isend[4];
1407   } else if (flag == MAT_GLOBAL_MAX) {
1408     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1409     info->nz_used      = irecv[0];
1410     info->nz_allocated = irecv[1];
1411     info->nz_unneeded  = irecv[2];
1412     info->memory       = irecv[3];
1413     info->mallocs      = irecv[4];
1414   } else if (flag == MAT_GLOBAL_SUM) {
1415     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1416     info->nz_used      = irecv[0];
1417     info->nz_allocated = irecv[1];
1418     info->nz_unneeded  = irecv[2];
1419     info->memory       = irecv[3];
1420     info->mallocs      = irecv[4];
1421   } else {
1422     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1423   }
1424   info->rows_global       = (double)a->M;
1425   info->columns_global    = (double)a->N;
1426   info->rows_local        = (double)a->m;
1427   info->columns_local     = (double)a->N;
1428   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1429   info->fill_ratio_needed = 0;
1430   info->factor_mallocs    = 0;
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNC__
1435 #define __FUNC__ "MatSetOption_MPIBAIJ"
1436 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1437 {
1438   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1439   int         ierr;
1440 
1441   PetscFunctionBegin;
1442   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1443       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1444       op == MAT_COLUMNS_UNSORTED ||
1445       op == MAT_COLUMNS_SORTED ||
1446       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1447       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1448         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1449         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1450   } else if (op == MAT_ROW_ORIENTED) {
1451         a->roworiented = 1;
1452         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1453         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1454   } else if (op == MAT_ROWS_SORTED ||
1455              op == MAT_ROWS_UNSORTED ||
1456              op == MAT_SYMMETRIC ||
1457              op == MAT_STRUCTURALLY_SYMMETRIC ||
1458              op == MAT_YES_NEW_DIAGONALS ||
1459              op == MAT_USE_HASH_TABLE) {
1460     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1461   } else if (op == MAT_COLUMN_ORIENTED) {
1462     a->roworiented = 0;
1463     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1464     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1465   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1466     a->donotstash = 1;
1467   } else if (op == MAT_NO_NEW_DIAGONALS) {
1468     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1469   } else if (op == MAT_USE_HASH_TABLE) {
1470     a->ht_flag = 1;
1471   } else {
1472     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1473   }
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNC__
1478 #define __FUNC__ "MatTranspose_MPIBAIJ("
1479 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1480 {
1481   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1482   Mat_SeqBAIJ *Aloc;
1483   Mat        B;
1484   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1485   int        bs=baij->bs,mbs=baij->mbs;
1486   Scalar     *a;
1487 
1488   PetscFunctionBegin;
1489   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1490   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1491 CHKERRQ(ierr);
1492 
1493   /* copy over the A part */
1494   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1495   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1496   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1497 
1498   for ( i=0; i<mbs; i++ ) {
1499     rvals[0] = bs*(baij->rstart + i);
1500     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1501     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1502       col = (baij->cstart+aj[j])*bs;
1503       for (k=0; k<bs; k++ ) {
1504         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1505         col++; a += bs;
1506       }
1507     }
1508   }
1509   /* copy over the B part */
1510   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1511   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1512   for ( i=0; i<mbs; i++ ) {
1513     rvals[0] = bs*(baij->rstart + i);
1514     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1515     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1516       col = baij->garray[aj[j]]*bs;
1517       for (k=0; k<bs; k++ ) {
1518         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1519         col++; a += bs;
1520       }
1521     }
1522   }
1523   ierr = PetscFree(rvals);CHKERRQ(ierr);
1524   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1525   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1526 
1527   if (matout != PETSC_NULL) {
1528     *matout = B;
1529   } else {
1530     PetscOps *Abops;
1531     MatOps   Aops;
1532 
1533     /* This isn't really an in-place transpose .... but free data structures from baij */
1534     ierr = PetscFree(baij->rowners); CHKERRQ(ierr);
1535     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1536     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1537 #if defined (PETSC_USE_CTABLE)
1538     if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);}
1539 #else
1540     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1541 #endif
1542     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1543     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1544     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1545     ierr = PetscFree(baij);CHKERRQ(ierr);
1546 
1547     /*
1548        This is horrible, horrible code. We need to keep the
1549       A pointers for the bops and ops but copy everything
1550       else from C.
1551     */
1552     Abops   = A->bops;
1553     Aops    = A->ops;
1554     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1555     A->bops = Abops;
1556     A->ops  = Aops;
1557 
1558     PetscHeaderDestroy(B);
1559   }
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 #undef __FUNC__
1564 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1565 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1566 {
1567   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1568   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1569   int ierr,s1,s2,s3;
1570 
1571   PetscFunctionBegin;
1572   if (ll)  {
1573     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1574     ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
1575     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1576     ierr = MatDiagonalScale(a,ll,0);CHKERRQ(ierr);
1577     ierr = MatDiagonalScale(b,ll,0);CHKERRQ(ierr);
1578   }
1579   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 #undef __FUNC__
1584 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1585 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1586 {
1587   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1588   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1589   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1590   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1591   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1592   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1593   MPI_Comm       comm = A->comm;
1594   MPI_Request    *send_waits,*recv_waits;
1595   MPI_Status     recv_status,*send_status;
1596   IS             istmp;
1597 
1598   PetscFunctionBegin;
1599   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1600   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1601 
1602   /*  first count number of contributors to each processor */
1603   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1604   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1605  procs   = nprocs + size;
1606   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1607   for ( i=0; i<N; i++ ) {
1608     idx = rows[i];
1609     found = 0;
1610     for ( j=0; j<size; j++ ) {
1611       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1612         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1613       }
1614     }
1615     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1616   }
1617   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1618 
1619   /* inform other processors of number of messages and max length*/
1620   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
1621   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1622   nrecvs = work[rank];
1623   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1624   nmax   = work[rank];
1625   ierr = PetscFree(work);CHKERRQ(ierr);
1626 
1627   /* post receives:   */
1628   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1629   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1630   for ( i=0; i<nrecvs; i++ ) {
1631     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1632   }
1633 
1634   /* do sends:
1635      1) starts[i] gives the starting index in svalues for stuff going to
1636      the ith processor
1637   */
1638   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1639   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1640   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
1641   starts[0] = 0;
1642   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1643   for ( i=0; i<N; i++ ) {
1644     svalues[starts[owner[i]]++] = rows[i];
1645   }
1646   ISRestoreIndices(is,&rows);
1647 
1648   starts[0] = 0;
1649   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1650   count = 0;
1651   for ( i=0; i<size; i++ ) {
1652     if (procs[i]) {
1653       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1654     }
1655   }
1656   ierr = PetscFree(starts);CHKERRQ(ierr);
1657 
1658   base = owners[rank]*bs;
1659 
1660   /*  wait on receives */
1661   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
1662   source = lens + nrecvs;
1663   count  = nrecvs; slen = 0;
1664   while (count) {
1665     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1666     /* unpack receives into our local space */
1667     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1668     source[imdex]  = recv_status.MPI_SOURCE;
1669     lens[imdex]  = n;
1670     slen += n;
1671     count--;
1672   }
1673   ierr = PetscFree(recv_waits); CHKERRQ(ierr);
1674 
1675   /* move the data into the send scatter */
1676   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
1677   count = 0;
1678   for ( i=0; i<nrecvs; i++ ) {
1679     values = rvalues + i*nmax;
1680     for ( j=0; j<lens[i]; j++ ) {
1681       lrows[count++] = values[j] - base;
1682     }
1683   }
1684   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1685   ierr = PetscFree(lens);CHKERRQ(ierr);
1686   ierr = PetscFree(owner);CHKERRQ(ierr);
1687   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1688 
1689   /* actually zap the local rows */
1690   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1691   PLogObjectParent(A,istmp);
1692 
1693   /*
1694         Zero the required rows. If the "diagonal block" of the matrix
1695      is square and the user wishes to set the diagonal we use seperate
1696      code so that MatSetValues() is not called for each diagonal allocating
1697      new memory, thus calling lots of mallocs and slowing things down.
1698 
1699        Contributed by: Mathew Knepley
1700   */
1701   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1702   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
1703   if (diag && (l->A->M == l->A->N)) {
1704     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
1705   } else if (diag) {
1706     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1707     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1708       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1709 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1710     }
1711     for ( i = 0; i < slen; i++ ) {
1712       row  = lrows[i] + rstart_bs;
1713       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1714     }
1715     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1716     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1717   } else {
1718     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1719   }
1720 
1721   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1722   ierr = PetscFree(lrows);CHKERRQ(ierr);
1723 
1724   /* wait on sends */
1725   if (nsends) {
1726     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1727     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1728     ierr = PetscFree(send_status);CHKERRQ(ierr);
1729   }
1730   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1731   ierr = PetscFree(svalues);CHKERRQ(ierr);
1732 
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 #undef __FUNC__
1737 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1738 int MatPrintHelp_MPIBAIJ(Mat A)
1739 {
1740   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1741   MPI_Comm    comm = A->comm;
1742   static int  called = 0;
1743   int         ierr;
1744 
1745   PetscFunctionBegin;
1746   if (!a->rank) {
1747     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1748   }
1749   if (called) {PetscFunctionReturn(0);} else called = 1;
1750   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1751   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 #undef __FUNC__
1756 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1757 int MatSetUnfactored_MPIBAIJ(Mat A)
1758 {
1759   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1760   int         ierr;
1761 
1762   PetscFunctionBegin;
1763   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1764   PetscFunctionReturn(0);
1765 }
1766 
1767 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1768 
1769 #undef __FUNC__
1770 #define __FUNC__ "MatEqual_MPIBAIJ"
1771 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
1772 {
1773   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
1774   Mat         a, b, c, d;
1775   PetscTruth  flg;
1776   int         ierr;
1777 
1778   PetscFunctionBegin;
1779   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1780   a = matA->A; b = matA->B;
1781   c = matB->A; d = matB->B;
1782 
1783   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1784   if (flg == PETSC_TRUE) {
1785     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1786   }
1787   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 /* -------------------------------------------------------------------*/
1792 static struct _MatOps MatOps_Values = {
1793   MatSetValues_MPIBAIJ,
1794   MatGetRow_MPIBAIJ,
1795   MatRestoreRow_MPIBAIJ,
1796   MatMult_MPIBAIJ,
1797   MatMultAdd_MPIBAIJ,
1798   MatMultTrans_MPIBAIJ,
1799   MatMultTransAdd_MPIBAIJ,
1800   0,
1801   0,
1802   0,
1803   0,
1804   0,
1805   0,
1806   0,
1807   MatTranspose_MPIBAIJ,
1808   MatGetInfo_MPIBAIJ,
1809   MatEqual_MPIBAIJ,
1810   MatGetDiagonal_MPIBAIJ,
1811   MatDiagonalScale_MPIBAIJ,
1812   MatNorm_MPIBAIJ,
1813   MatAssemblyBegin_MPIBAIJ,
1814   MatAssemblyEnd_MPIBAIJ,
1815   0,
1816   MatSetOption_MPIBAIJ,
1817   MatZeroEntries_MPIBAIJ,
1818   MatZeroRows_MPIBAIJ,
1819   0,
1820   0,
1821   0,
1822   0,
1823   MatGetSize_MPIBAIJ,
1824   MatGetLocalSize_MPIBAIJ,
1825   MatGetOwnershipRange_MPIBAIJ,
1826   0,
1827   0,
1828   0,
1829   0,
1830   MatDuplicate_MPIBAIJ,
1831   0,
1832   0,
1833   0,
1834   0,
1835   0,
1836   MatGetSubMatrices_MPIBAIJ,
1837   MatIncreaseOverlap_MPIBAIJ,
1838   MatGetValues_MPIBAIJ,
1839   0,
1840   MatPrintHelp_MPIBAIJ,
1841   MatScale_MPIBAIJ,
1842   0,
1843   0,
1844   0,
1845   MatGetBlockSize_MPIBAIJ,
1846   0,
1847   0,
1848   0,
1849   0,
1850   0,
1851   0,
1852   MatSetUnfactored_MPIBAIJ,
1853   0,
1854   MatSetValuesBlocked_MPIBAIJ,
1855   0,
1856   0,
1857   0,
1858   MatGetMaps_Petsc};
1859 
1860 
1861 EXTERN_C_BEGIN
1862 #undef __FUNC__
1863 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1864 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1865 {
1866   PetscFunctionBegin;
1867   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1868   *iscopy = PETSC_FALSE;
1869   PetscFunctionReturn(0);
1870 }
1871 EXTERN_C_END
1872 
1873 #undef __FUNC__
1874 #define __FUNC__ "MatCreateMPIBAIJ"
1875 /*@C
1876    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1877    (block compressed row).  For good matrix assembly performance
1878    the user should preallocate the matrix storage by setting the parameters
1879    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1880    performance can be increased by more than a factor of 50.
1881 
1882    Collective on MPI_Comm
1883 
1884    Input Parameters:
1885 +  comm - MPI communicator
1886 .  bs   - size of blockk
1887 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1888            This value should be the same as the local size used in creating the
1889            y vector for the matrix-vector product y = Ax.
1890 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1891            This value should be the same as the local size used in creating the
1892            x vector for the matrix-vector product y = Ax.
1893 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1894 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1895 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1896            submatrix  (same for all local rows)
1897 .  d_nnz - array containing the number of block nonzeros in the various block rows
1898            of the in diagonal portion of the local (possibly different for each block
1899            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1900 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1901            submatrix (same for all local rows).
1902 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1903            off-diagonal portion of the local submatrix (possibly different for
1904            each block row) or PETSC_NULL.
1905 
1906    Output Parameter:
1907 .  A - the matrix
1908 
1909    Options Database Keys:
1910 .   -mat_no_unroll - uses code that does not unroll the loops in the
1911                      block calculations (much slower)
1912 .   -mat_block_size - size of the blocks to use
1913 .   -mat_mpi - use the parallel matrix data structures even on one processor
1914                (defaults to using SeqBAIJ format on one processor)
1915 
1916    Notes:
1917    The user MUST specify either the local or global matrix dimensions
1918    (possibly both).
1919 
1920    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1921    than it must be used on all processors that share the object for that argument.
1922 
1923    Storage Information:
1924    For a square global matrix we define each processor's diagonal portion
1925    to be its local rows and the corresponding columns (a square submatrix);
1926    each processor's off-diagonal portion encompasses the remainder of the
1927    local matrix (a rectangular submatrix).
1928 
1929    The user can specify preallocated storage for the diagonal part of
1930    the local submatrix with either d_nz or d_nnz (not both).  Set
1931    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1932    memory allocation.  Likewise, specify preallocated storage for the
1933    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1934 
1935    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1936    the figure below we depict these three local rows and all columns (0-11).
1937 
1938 .vb
1939            0 1 2 3 4 5 6 7 8 9 10 11
1940           -------------------
1941    row 3  |  o o o d d d o o o o o o
1942    row 4  |  o o o d d d o o o o o o
1943    row 5  |  o o o d d d o o o o o o
1944           -------------------
1945 .ve
1946 
1947    Thus, any entries in the d locations are stored in the d (diagonal)
1948    submatrix, and any entries in the o locations are stored in the
1949    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1950    stored simply in the MATSEQBAIJ format for compressed row storage.
1951 
1952    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1953    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1954    In general, for PDE problems in which most nonzeros are near the diagonal,
1955    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1956    or you will get TERRIBLE performance; see the users' manual chapter on
1957    matrices.
1958 
1959    Level: intermediate
1960 
1961 .keywords: matrix, block, aij, compressed row, sparse, parallel
1962 
1963 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1964 @*/
1965 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1966                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1967 {
1968   Mat          B;
1969   Mat_MPIBAIJ  *b;
1970   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1971   int          flag1 = 0,flag2 = 0;
1972 
1973   PetscFunctionBegin;
1974   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1975 
1976   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1977 
1978   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1979   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
1980   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1981   if (!flag1 && !flag2 && size == 1) {
1982     if (M == PETSC_DECIDE) M = m;
1983     if (N == PETSC_DECIDE) N = n;
1984     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1985     PetscFunctionReturn(0);
1986   }
1987 
1988   *A = 0;
1989   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
1990   PLogObjectCreate(B);
1991   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
1992   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
1993   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1994 
1995   B->ops->destroy    = MatDestroy_MPIBAIJ;
1996   B->ops->view       = MatView_MPIBAIJ;
1997   B->mapping    = 0;
1998   B->factor     = 0;
1999   B->assembled  = PETSC_FALSE;
2000 
2001   B->insertmode = NOT_SET_VALUES;
2002   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2003   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2004 
2005   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2006     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2007   }
2008   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2009     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2010   }
2011   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2012     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2013   }
2014   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2015   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2016 
2017   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2018     work[0] = m; work[1] = n;
2019     mbs = m/bs; nbs = n/bs;
2020     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2021     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2022     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2023   }
2024   if (m == PETSC_DECIDE) {
2025     Mbs = M/bs;
2026     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2027     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2028     m   = mbs*bs;
2029   }
2030   if (n == PETSC_DECIDE) {
2031     Nbs = N/bs;
2032     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2033     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2034     n   = nbs*bs;
2035   }
2036   if (mbs*bs != m || nbs*bs != n) {
2037     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2038   }
2039 
2040   b->m = m; B->m = m;
2041   b->n = n; B->n = n;
2042   b->N = N; B->N = N;
2043   b->M = M; B->M = M;
2044   b->bs  = bs;
2045   b->bs2 = bs*bs;
2046   b->mbs = mbs;
2047   b->nbs = nbs;
2048   b->Mbs = Mbs;
2049   b->Nbs = Nbs;
2050 
2051   /* the information in the maps duplicates the information computed below, eventually
2052      we should remove the duplicate information that is not contained in the maps */
2053   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2054   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2055 
2056   /* build local table of row and column ownerships */
2057   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2058   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2059   b->cowners    = b->rowners + b->size + 2;
2060   b->rowners_bs = b->cowners + b->size + 2;
2061   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2062   b->rowners[0]    = 0;
2063   for ( i=2; i<=b->size; i++ ) {
2064     b->rowners[i] += b->rowners[i-1];
2065   }
2066   for ( i=0; i<=b->size; i++ ) {
2067     b->rowners_bs[i] = b->rowners[i]*bs;
2068   }
2069   b->rstart    = b->rowners[b->rank];
2070   b->rend      = b->rowners[b->rank+1];
2071   b->rstart_bs = b->rstart * bs;
2072   b->rend_bs   = b->rend * bs;
2073 
2074   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2075   b->cowners[0] = 0;
2076   for ( i=2; i<=b->size; i++ ) {
2077     b->cowners[i] += b->cowners[i-1];
2078   }
2079   b->cstart    = b->cowners[b->rank];
2080   b->cend      = b->cowners[b->rank+1];
2081   b->cstart_bs = b->cstart * bs;
2082   b->cend_bs   = b->cend * bs;
2083 
2084 
2085   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2086   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2087   PLogObjectParent(B,b->A);
2088   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2089   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2090   PLogObjectParent(B,b->B);
2091 
2092   /* build cache for off array entries formed */
2093   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2094   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2095   b->donotstash  = 0;
2096   b->colmap      = 0;
2097   b->garray      = 0;
2098   b->roworiented = 1;
2099 
2100   /* stuff used in block assembly */
2101   b->barray       = 0;
2102 
2103   /* stuff used for matrix vector multiply */
2104   b->lvec         = 0;
2105   b->Mvctx        = 0;
2106 
2107   /* stuff for MatGetRow() */
2108   b->rowindices   = 0;
2109   b->rowvalues    = 0;
2110   b->getrowactive = PETSC_FALSE;
2111 
2112   /* hash table stuff */
2113   b->ht           = 0;
2114   b->hd           = 0;
2115   b->ht_size      = 0;
2116   b->ht_flag      = 0;
2117   b->ht_fact      = 0;
2118   b->ht_total_ct  = 0;
2119   b->ht_insert_ct = 0;
2120 
2121   *A = B;
2122   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2123   if (flg) {
2124     double fact = 1.39;
2125     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2126     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr);
2127     if (fact <= 1.0) fact = 1.39;
2128     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2129     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2130   }
2131   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
2132                                      "MatStoreValues_MPIBAIJ",
2133                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2134   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
2135                                      "MatRetrieveValues_MPIBAIJ",
2136                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2137   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2138                                      "MatGetDiagonalBlock_MPIBAIJ",
2139                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 #undef __FUNC__
2144 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2145 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2146 {
2147   Mat         mat;
2148   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2149   int         ierr, len=0, flg;
2150 
2151   PetscFunctionBegin;
2152   *newmat       = 0;
2153   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2154   PLogObjectCreate(mat);
2155   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2156   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2157   mat->ops->destroy = MatDestroy_MPIBAIJ;
2158   mat->ops->view    = MatView_MPIBAIJ;
2159   mat->factor       = matin->factor;
2160   mat->assembled    = PETSC_TRUE;
2161   mat->insertmode   = NOT_SET_VALUES;
2162 
2163   a->m = mat->m   = oldmat->m;
2164   a->n = mat->n   = oldmat->n;
2165   a->M = mat->M   = oldmat->M;
2166   a->N = mat->N   = oldmat->N;
2167 
2168   a->bs  = oldmat->bs;
2169   a->bs2 = oldmat->bs2;
2170   a->mbs = oldmat->mbs;
2171   a->nbs = oldmat->nbs;
2172   a->Mbs = oldmat->Mbs;
2173   a->Nbs = oldmat->Nbs;
2174 
2175   a->rstart       = oldmat->rstart;
2176   a->rend         = oldmat->rend;
2177   a->cstart       = oldmat->cstart;
2178   a->cend         = oldmat->cend;
2179   a->size         = oldmat->size;
2180   a->rank         = oldmat->rank;
2181   a->donotstash   = oldmat->donotstash;
2182   a->roworiented  = oldmat->roworiented;
2183   a->rowindices   = 0;
2184   a->rowvalues    = 0;
2185   a->getrowactive = PETSC_FALSE;
2186   a->barray       = 0;
2187   a->rstart_bs    = oldmat->rstart_bs;
2188   a->rend_bs      = oldmat->rend_bs;
2189   a->cstart_bs    = oldmat->cstart_bs;
2190   a->cend_bs      = oldmat->cend_bs;
2191 
2192   /* hash table stuff */
2193   a->ht           = 0;
2194   a->hd           = 0;
2195   a->ht_size      = 0;
2196   a->ht_flag      = oldmat->ht_flag;
2197   a->ht_fact      = oldmat->ht_fact;
2198   a->ht_total_ct  = 0;
2199   a->ht_insert_ct = 0;
2200 
2201 
2202   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2203   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2204   a->cowners    = a->rowners + a->size + 2;
2205   a->rowners_bs = a->cowners + a->size + 2;
2206   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2207   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2208   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2209   if (oldmat->colmap) {
2210 #if defined (PETSC_USE_CTABLE)
2211   ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2212 #else
2213     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2214     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2215     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2216 #endif
2217   } else a->colmap = 0;
2218   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2219     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2220     PLogObjectMemory(mat,len*sizeof(int));
2221     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2222   } else a->garray = 0;
2223 
2224   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2225   PLogObjectParent(mat,a->lvec);
2226   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2227 
2228   PLogObjectParent(mat,a->Mvctx);
2229   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2230   PLogObjectParent(mat,a->A);
2231   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2232   PLogObjectParent(mat,a->B);
2233   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2234   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2235   if (flg) {
2236     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2237   }
2238   *newmat = mat;
2239   PetscFunctionReturn(0);
2240 }
2241 
2242 #include "sys.h"
2243 
2244 #undef __FUNC__
2245 #define __FUNC__ "MatLoad_MPIBAIJ"
2246 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2247 {
2248   Mat          A;
2249   int          i, nz, ierr, j,rstart, rend, fd;
2250   Scalar       *vals,*buf;
2251   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2252   MPI_Status   status;
2253   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2254   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2255   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2256   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2257   int          dcount,kmax,k,nzcount,tmp;
2258 
2259   PetscFunctionBegin;
2260   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2261 
2262   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2263   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2264   if (!rank) {
2265     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2266     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2267     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2268     if (header[3] < 0) {
2269       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2270     }
2271   }
2272 
2273   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2274   M = header[1]; N = header[2];
2275 
2276   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2277 
2278   /*
2279      This code adds extra rows to make sure the number of rows is
2280      divisible by the blocksize
2281   */
2282   Mbs        = M/bs;
2283   extra_rows = bs - M + bs*(Mbs);
2284   if (extra_rows == bs) extra_rows = 0;
2285   else                  Mbs++;
2286   if (extra_rows &&!rank) {
2287     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2288   }
2289 
2290   /* determine ownership of all rows */
2291   mbs = Mbs/size + ((Mbs % size) > rank);
2292   m   = mbs * bs;
2293   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2294   browners = rowners + size + 1;
2295   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2296   rowners[0] = 0;
2297   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2298   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2299   rstart = rowners[rank];
2300   rend   = rowners[rank+1];
2301 
2302   /* distribute row lengths to all processors */
2303   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
2304   if (!rank) {
2305     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2306     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2307     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2308     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2309     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2310     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2311     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2312   } else {
2313     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2314   }
2315 
2316   if (!rank) {
2317     /* calculate the number of nonzeros on each processor */
2318     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2319     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2320     for ( i=0; i<size; i++ ) {
2321       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2322         procsnz[i] += rowlengths[j];
2323       }
2324     }
2325     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2326 
2327     /* determine max buffer needed and allocate it */
2328     maxnz = 0;
2329     for ( i=0; i<size; i++ ) {
2330       maxnz = PetscMax(maxnz,procsnz[i]);
2331     }
2332     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2333 
2334     /* read in my part of the matrix column indices  */
2335     nz = procsnz[0];
2336     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2337     mycols = ibuf;
2338     if (size == 1)  nz -= extra_rows;
2339     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2340     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2341 
2342     /* read in every ones (except the last) and ship off */
2343     for ( i=1; i<size-1; i++ ) {
2344       nz   = procsnz[i];
2345       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2346       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2347     }
2348     /* read in the stuff for the last proc */
2349     if ( size != 1 ) {
2350       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2351       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2352       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2353       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2354     }
2355     ierr = PetscFree(cols);CHKERRQ(ierr);
2356   } else {
2357     /* determine buffer space needed for message */
2358     nz = 0;
2359     for ( i=0; i<m; i++ ) {
2360       nz += locrowlens[i];
2361     }
2362     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2363     mycols = ibuf;
2364     /* receive message of column indices*/
2365     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2366     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2367     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2368   }
2369 
2370   /* loop over local rows, determining number of off diagonal entries */
2371   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2372   odlens = dlens + (rend-rstart);
2373   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2374   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2375   masked1 = mask    + Mbs;
2376   masked2 = masked1 + Mbs;
2377   rowcount = 0; nzcount = 0;
2378   for ( i=0; i<mbs; i++ ) {
2379     dcount  = 0;
2380     odcount = 0;
2381     for ( j=0; j<bs; j++ ) {
2382       kmax = locrowlens[rowcount];
2383       for ( k=0; k<kmax; k++ ) {
2384         tmp = mycols[nzcount++]/bs;
2385         if (!mask[tmp]) {
2386           mask[tmp] = 1;
2387           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2388           else masked1[dcount++] = tmp;
2389         }
2390       }
2391       rowcount++;
2392     }
2393 
2394     dlens[i]  = dcount;
2395     odlens[i] = odcount;
2396 
2397     /* zero out the mask elements we set */
2398     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2399     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2400   }
2401 
2402   /* create our matrix */
2403   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2404   A = *newmat;
2405   MatSetOption(A,MAT_COLUMNS_SORTED);
2406 
2407   if (!rank) {
2408     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
2409     /* read in my part of the matrix numerical values  */
2410     nz = procsnz[0];
2411     vals = buf;
2412     mycols = ibuf;
2413     if (size == 1)  nz -= extra_rows;
2414     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2415     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2416 
2417     /* insert into matrix */
2418     jj      = rstart*bs;
2419     for ( i=0; i<m; i++ ) {
2420       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2421       mycols += locrowlens[i];
2422       vals   += locrowlens[i];
2423       jj++;
2424     }
2425     /* read in other processors (except the last one) and ship out */
2426     for ( i=1; i<size-1; i++ ) {
2427       nz   = procsnz[i];
2428       vals = buf;
2429       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2430       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2431     }
2432     /* the last proc */
2433     if ( size != 1 ){
2434       nz   = procsnz[i] - extra_rows;
2435       vals = buf;
2436       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2437       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2438       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2439     }
2440     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2441   } else {
2442     /* receive numeric values */
2443     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
2444 
2445     /* receive message of values*/
2446     vals   = buf;
2447     mycols = ibuf;
2448     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2449     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2450     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2451 
2452     /* insert into matrix */
2453     jj      = rstart*bs;
2454     for ( i=0; i<m; i++ ) {
2455       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2456       mycols += locrowlens[i];
2457       vals   += locrowlens[i];
2458       jj++;
2459     }
2460   }
2461   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2462   ierr = PetscFree(buf);CHKERRQ(ierr);
2463   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2464   ierr = PetscFree(rowners);CHKERRQ(ierr);
2465   ierr = PetscFree(dlens);CHKERRQ(ierr);
2466   ierr = PetscFree(mask);CHKERRQ(ierr);
2467   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2468   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 
2473 
2474 #undef __FUNC__
2475 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2476 /*@
2477    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2478 
2479    Input Parameters:
2480 .  mat  - the matrix
2481 .  fact - factor
2482 
2483    Collective on Mat
2484 
2485    Level: advanced
2486 
2487   Notes:
2488    This can also be set by the command line option: -mat_use_hash_table fact
2489 
2490 .keywords: matrix, hashtable, factor, HT
2491 
2492 .seealso: MatSetOption()
2493 @*/
2494 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2495 {
2496   Mat_MPIBAIJ *baij;
2497 
2498   PetscFunctionBegin;
2499   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2500   if (mat->type != MATMPIBAIJ) {
2501       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2502   }
2503   baij = (Mat_MPIBAIJ*) mat->data;
2504   baij->ht_fact = fact;
2505   PetscFunctionReturn(0);
2506 }
2507