xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 861218d8f08bbcfd0745d7575957ddfa2bc5d1be)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.165 1999/03/19 21:19:45 bsmith Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 EXTERN_C_BEGIN
20 #undef __FUNC__
21 #define __FUNC__ "MatStoreValues_MPIBAIJ"
22 int MatStoreValues_MPIBAIJ(Mat mat)
23 {
24   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
25   int         ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
29   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 EXTERN_C_END
33 
34 EXTERN_C_BEGIN
35 #undef __FUNC__
36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
37 int MatRetrieveValues_MPIBAIJ(Mat mat)
38 {
39   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
40   int         ierr;
41 
42   PetscFunctionBegin;
43   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
44   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
45   PetscFunctionReturn(0);
46 }
47 EXTERN_C_END
48 
49 /*
50      Local utility routine that creates a mapping from the global column
51    number to the local number in the off-diagonal part of the local
52    storage of the matrix.  This is done in a non scable way since the
53    length of colmap equals the global matrix length.
54 */
55 #undef __FUNC__
56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
57 static int CreateColmap_MPIBAIJ_Private(Mat mat)
58 {
59   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
60   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
61   int         nbs = B->nbs,i,bs=B->bs;
62 #if defined (USE_CTABLE)
63   int         ierr;
64 #endif
65 
66   PetscFunctionBegin;
67 #if defined (USE_CTABLE)
68   ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr);
69   for ( i=0; i<nbs; i++ ){
70     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
71   }
72 #else
73   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
74   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
75   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
76   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
77 #endif
78   PetscFunctionReturn(0);
79 }
80 
81 #define CHUNKSIZE  10
82 
83 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
84 { \
85  \
86     brow = row/bs;  \
87     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
88     rmax = aimax[brow]; nrow = ailen[brow]; \
89       bcol = col/bs; \
90       ridx = row % bs; cidx = col % bs; \
91       low = 0; high = nrow; \
92       while (high-low > 3) { \
93         t = (low+high)/2; \
94         if (rp[t] > bcol) high = t; \
95         else              low  = t; \
96       } \
97       for ( _i=low; _i<high; _i++ ) { \
98         if (rp[_i] > bcol) break; \
99         if (rp[_i] == bcol) { \
100           bap  = ap +  bs2*_i + bs*cidx + ridx; \
101           if (addv == ADD_VALUES) *bap += value;  \
102           else                    *bap  = value;  \
103           goto a_noinsert; \
104         } \
105       } \
106       if (a->nonew == 1) goto a_noinsert; \
107       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
108       if (nrow >= rmax) { \
109         /* there is no extra room in row, therefore enlarge */ \
110         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
111         Scalar *new_a; \
112  \
113         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
114  \
115         /* malloc new storage space */ \
116         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
117         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
118         new_j   = (int *) (new_a + bs2*new_nz); \
119         new_i   = new_j + new_nz; \
120  \
121         /* copy over old data into new slots */ \
122         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
123         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
124         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
125         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
126         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
127                                                            len*sizeof(int)); \
128         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
129         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
130         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
131                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
132         /* free up old matrix storage */ \
133         PetscFree(a->a);  \
134         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
135         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
136         a->singlemalloc = 1; \
137  \
138         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
139         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
140         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
141         a->maxnz += bs2*CHUNKSIZE; \
142         a->reallocs++; \
143         a->nz++; \
144       } \
145       N = nrow++ - 1;  \
146       /* shift up all the later entries in this row */ \
147       for ( ii=N; ii>=_i; ii-- ) { \
148         rp[ii+1] = rp[ii]; \
149         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
150       } \
151       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
152       rp[_i]                      = bcol;  \
153       ap[bs2*_i + bs*cidx + ridx] = value;  \
154       a_noinsert:; \
155     ailen[brow] = nrow; \
156 }
157 
158 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
159 { \
160  \
161     brow = row/bs;  \
162     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
163     rmax = bimax[brow]; nrow = bilen[brow]; \
164       bcol = col/bs; \
165       ridx = row % bs; cidx = col % bs; \
166       low = 0; high = nrow; \
167       while (high-low > 3) { \
168         t = (low+high)/2; \
169         if (rp[t] > bcol) high = t; \
170         else              low  = t; \
171       } \
172       for ( _i=low; _i<high; _i++ ) { \
173         if (rp[_i] > bcol) break; \
174         if (rp[_i] == bcol) { \
175           bap  = ap +  bs2*_i + bs*cidx + ridx; \
176           if (addv == ADD_VALUES) *bap += value;  \
177           else                    *bap  = value;  \
178           goto b_noinsert; \
179         } \
180       } \
181       if (b->nonew == 1) goto b_noinsert; \
182       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
183       if (nrow >= rmax) { \
184         /* there is no extra room in row, therefore enlarge */ \
185         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
186         Scalar *new_a; \
187  \
188         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
189  \
190         /* malloc new storage space */ \
191         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
192         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
193         new_j   = (int *) (new_a + bs2*new_nz); \
194         new_i   = new_j + new_nz; \
195  \
196         /* copy over old data into new slots */ \
197         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
198         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
199         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
200         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
201         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
202                                                            len*sizeof(int)); \
203         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
204         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
205         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
206                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
207         /* free up old matrix storage */ \
208         PetscFree(b->a);  \
209         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
210         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
211         b->singlemalloc = 1; \
212  \
213         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
214         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
215         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
216         b->maxnz += bs2*CHUNKSIZE; \
217         b->reallocs++; \
218         b->nz++; \
219       } \
220       N = nrow++ - 1;  \
221       /* shift up all the later entries in this row */ \
222       for ( ii=N; ii>=_i; ii-- ) { \
223         rp[ii+1] = rp[ii]; \
224         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
225       } \
226       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
227       rp[_i]                      = bcol;  \
228       ap[bs2*_i + bs*cidx + ridx] = value;  \
229       b_noinsert:; \
230     bilen[brow] = nrow; \
231 }
232 
233 #undef __FUNC__
234 #define __FUNC__ "MatSetValues_MPIBAIJ"
235 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
236 {
237   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
238   Scalar      value;
239   int         ierr,i,j,row,col;
240   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
241   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
242   int         cend_orig=baij->cend_bs,bs=baij->bs;
243 
244   /* Some Variables required in the macro */
245   Mat         A = baij->A;
246   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
247   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
248   Scalar      *aa=a->a;
249 
250   Mat         B = baij->B;
251   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
252   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
253   Scalar      *ba=b->a;
254 
255   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
256   int         low,high,t,ridx,cidx,bs2=a->bs2;
257   Scalar      *ap,*bap;
258 
259   PetscFunctionBegin;
260   for ( i=0; i<m; i++ ) {
261     if (im[i] < 0) continue;
262 #if defined(USE_PETSC_BOPT_g)
263     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
264 #endif
265     if (im[i] >= rstart_orig && im[i] < rend_orig) {
266       row = im[i] - rstart_orig;
267       for ( j=0; j<n; j++ ) {
268         if (in[j] >= cstart_orig && in[j] < cend_orig){
269           col = in[j] - cstart_orig;
270           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
271           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
272           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
273         } else if (in[j] < 0) continue;
274 #if defined(USE_PETSC_BOPT_g)
275         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
276 #endif
277         else {
278           if (mat->was_assembled) {
279             if (!baij->colmap) {
280               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
281             }
282 #if defined (USE_CTABLE)
283             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr);
284             col  = col - 1 + in[j]%bs;
285 #else
286             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
287 #endif
288             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
289               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
290               col =  in[j];
291               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
292               B = baij->B;
293               b = (Mat_SeqBAIJ *) (B)->data;
294               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
295               ba=b->a;
296             }
297           } else col = in[j];
298           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
299           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
300           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
301         }
302       }
303     } else {
304       if ( !baij->donotstash) {
305         if (roworiented) {
306           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
307         } else {
308           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
309         }
310       }
311     }
312   }
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNC__
317 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
318 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
319 {
320   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
321   Scalar      *value,*barray=baij->barray;
322   int         ierr,i,j,ii,jj,row,col;
323   int         roworiented = baij->roworiented,rstart=baij->rstart ;
324   int         rend=baij->rend,cstart=baij->cstart,stepval;
325   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
326 
327   if(!barray) {
328     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
329   }
330 
331   if (roworiented) {
332     stepval = (n-1)*bs;
333   } else {
334     stepval = (m-1)*bs;
335   }
336   for ( i=0; i<m; i++ ) {
337     if (im[i] < 0) continue;
338 #if defined(USE_PETSC_BOPT_g)
339     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
340 #endif
341     if (im[i] >= rstart && im[i] < rend) {
342       row = im[i] - rstart;
343       for ( j=0; j<n; j++ ) {
344         /* If NumCol = 1 then a copy is not required */
345         if ((roworiented) && (n == 1)) {
346           barray = v + i*bs2;
347         } else if((!roworiented) && (m == 1)) {
348           barray = v + j*bs2;
349         } else { /* Here a copy is required */
350           if (roworiented) {
351             value = v + i*(stepval+bs)*bs + j*bs;
352           } else {
353             value = v + j*(stepval+bs)*bs + i*bs;
354           }
355           for ( ii=0; ii<bs; ii++,value+=stepval ) {
356             for (jj=0; jj<bs; jj++ ) {
357               *barray++  = *value++;
358             }
359           }
360           barray -=bs2;
361         }
362 
363         if (in[j] >= cstart && in[j] < cend){
364           col  = in[j] - cstart;
365           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
366         }
367         else if (in[j] < 0) continue;
368 #if defined(USE_PETSC_BOPT_g)
369         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
370 #endif
371         else {
372           if (mat->was_assembled) {
373             if (!baij->colmap) {
374               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
375             }
376 
377 #if defined(USE_PETSC_BOPT_g)
378 #if defined (USE_CTABLE)
379             { int data;
380             ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr);
381             if((data - 1) % bs)
382 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
383             }
384 #else
385             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
386 #endif
387 #endif
388 #if defined (USE_CTABLE)
389 	    ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr);
390             col  = (col - 1)/bs;
391 #else
392             col = (baij->colmap[in[j]] - 1)/bs;
393 #endif
394             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
395               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
396               col =  in[j];
397             }
398           }
399           else col = in[j];
400           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
401         }
402       }
403     } else {
404       if (!baij->donotstash) {
405         if (roworiented) {
406           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
407         } else {
408           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
409         }
410       }
411     }
412   }
413   PetscFunctionReturn(0);
414 }
415 #define HASH_KEY 0.6180339887
416 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
417 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
418 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
419 #undef __FUNC__
420 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
421 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
422 {
423   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
424   int         ierr,i,j,row,col;
425   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
426   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
427   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
428   double      tmp;
429   Scalar      ** HD = baij->hd,value;
430 #if defined(USE_PETSC_BOPT_g)
431   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
432 #endif
433 
434   PetscFunctionBegin;
435 
436   for ( i=0; i<m; i++ ) {
437 #if defined(USE_PETSC_BOPT_g)
438     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
439     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
440 #endif
441       row = im[i];
442     if (row >= rstart_orig && row < rend_orig) {
443       for ( j=0; j<n; j++ ) {
444         col = in[j];
445         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
446         /* Look up into the Hash Table */
447         key = (row/bs)*Nbs+(col/bs)+1;
448         h1  = HASH(size,key,tmp);
449 
450 
451         idx = h1;
452 #if defined(USE_PETSC_BOPT_g)
453         insert_ct++;
454         total_ct++;
455         if (HT[idx] != key) {
456           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
457           if (idx == size) {
458             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
459             if (idx == h1) {
460               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
461             }
462           }
463         }
464 #else
465         if (HT[idx] != key) {
466           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
467           if (idx == size) {
468             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
469             if (idx == h1) {
470               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
471             }
472           }
473         }
474 #endif
475         /* A HASH table entry is found, so insert the values at the correct address */
476         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
477         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
478       }
479     } else {
480       if (!baij->donotstash) {
481         if (roworiented) {
482           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
483         } else {
484           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
485         }
486       }
487     }
488   }
489 #if defined(USE_PETSC_BOPT_g)
490   baij->ht_total_ct = total_ct;
491   baij->ht_insert_ct = insert_ct;
492 #endif
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNC__
497 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
498 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
499 {
500   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
501   int         ierr,i,j,ii,jj,row,col;
502   int         roworiented = baij->roworiented,rstart=baij->rstart ;
503   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
504   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
505   double      tmp;
506   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
507 #if defined(USE_PETSC_BOPT_g)
508   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
509 #endif
510 
511   PetscFunctionBegin;
512 
513   if (roworiented) {
514     stepval = (n-1)*bs;
515   } else {
516     stepval = (m-1)*bs;
517   }
518   for ( i=0; i<m; i++ ) {
519 #if defined(USE_PETSC_BOPT_g)
520     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
521     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
522 #endif
523     row   = im[i];
524     v_t   = v + i*bs2;
525     if (row >= rstart && row < rend) {
526       for ( j=0; j<n; j++ ) {
527         col = in[j];
528 
529         /* Look up into the Hash Table */
530         key = row*Nbs+col+1;
531         h1  = HASH(size,key,tmp);
532 
533         idx = h1;
534 #if defined(USE_PETSC_BOPT_g)
535         total_ct++;
536         insert_ct++;
537        if (HT[idx] != key) {
538           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
539           if (idx == size) {
540             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
541             if (idx == h1) {
542               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
543             }
544           }
545         }
546 #else
547         if (HT[idx] != key) {
548           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
549           if (idx == size) {
550             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
551             if (idx == h1) {
552               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
553             }
554           }
555         }
556 #endif
557         baij_a = HD[idx];
558         if (roworiented) {
559           /*value = v + i*(stepval+bs)*bs + j*bs;*/
560           /* value = v + (i*(stepval+bs)+j)*bs; */
561           value = v_t;
562           v_t  += bs;
563           if (addv == ADD_VALUES) {
564             for ( ii=0; ii<bs; ii++,value+=stepval) {
565               for ( jj=ii; jj<bs2; jj+=bs ) {
566                 baij_a[jj]  += *value++;
567               }
568             }
569           } else {
570             for ( ii=0; ii<bs; ii++,value+=stepval) {
571               for ( jj=ii; jj<bs2; jj+=bs ) {
572                 baij_a[jj]  = *value++;
573               }
574             }
575           }
576         } else {
577           value = v + j*(stepval+bs)*bs + i*bs;
578           if (addv == ADD_VALUES) {
579             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
580               for ( jj=0; jj<bs; jj++ ) {
581                 baij_a[jj]  += *value++;
582               }
583             }
584           } else {
585             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
586               for ( jj=0; jj<bs; jj++ ) {
587                 baij_a[jj]  = *value++;
588               }
589             }
590           }
591         }
592       }
593     } else {
594       if (!baij->donotstash) {
595         if (roworiented) {
596           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
597         } else {
598           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
599         }
600       }
601     }
602   }
603 #if defined(USE_PETSC_BOPT_g)
604   baij->ht_total_ct = total_ct;
605   baij->ht_insert_ct = insert_ct;
606 #endif
607   PetscFunctionReturn(0);
608 }
609 
610 #undef __FUNC__
611 #define __FUNC__ "MatGetValues_MPIBAIJ"
612 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
613 {
614   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
615   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
616   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
617 
618   PetscFunctionBegin;
619   for ( i=0; i<m; i++ ) {
620     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
621     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
622     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
623       row = idxm[i] - bsrstart;
624       for ( j=0; j<n; j++ ) {
625         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
626         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
627         if (idxn[j] >= bscstart && idxn[j] < bscend){
628           col = idxn[j] - bscstart;
629           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
630         } else {
631           if (!baij->colmap) {
632             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
633           }
634 #if defined (USE_CTABLE)
635           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr);
636           data --;
637 #else
638           data = baij->colmap[idxn[j]/bs]-1;
639 #endif
640           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
641           else {
642             col  = data + idxn[j]%bs;
643             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
644           }
645         }
646       }
647     } else {
648       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
649     }
650   }
651  PetscFunctionReturn(0);
652 }
653 
654 #undef __FUNC__
655 #define __FUNC__ "MatNorm_MPIBAIJ"
656 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
657 {
658   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
659   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
660   int        ierr, i,bs2=baij->bs2;
661   double     sum = 0.0;
662   Scalar     *v;
663 
664   PetscFunctionBegin;
665   if (baij->size == 1) {
666     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
667   } else {
668     if (type == NORM_FROBENIUS) {
669       v = amat->a;
670       for (i=0; i<amat->nz*bs2; i++ ) {
671 #if defined(USE_PETSC_COMPLEX)
672         sum += PetscReal(PetscConj(*v)*(*v)); v++;
673 #else
674         sum += (*v)*(*v); v++;
675 #endif
676       }
677       v = bmat->a;
678       for (i=0; i<bmat->nz*bs2; i++ ) {
679 #if defined(USE_PETSC_COMPLEX)
680         sum += PetscReal(PetscConj(*v)*(*v)); v++;
681 #else
682         sum += (*v)*(*v); v++;
683 #endif
684       }
685       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
686       *norm = sqrt(*norm);
687     } else {
688       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
689     }
690   }
691   PetscFunctionReturn(0);
692 }
693 
694 
695 /*
696   Creates the hash table, and sets the table
697   This table is created only once.
698   If new entried need to be added to the matrix
699   then the hash table has to be destroyed and
700   recreated.
701 */
702 #undef __FUNC__
703 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
704 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
705 {
706   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
707   Mat         A = baij->A, B=baij->B;
708   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
709   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
710   int         size,bs2=baij->bs2,rstart=baij->rstart;
711   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
712   int         *HT,key;
713   Scalar      **HD;
714   double      tmp;
715 #if defined(USE_PETSC_BOPT_g)
716   int         ct=0,max=0;
717 #endif
718 
719   PetscFunctionBegin;
720   baij->ht_size=(int)(factor*nz);
721   size = baij->ht_size;
722 
723   if (baij->ht) {
724     PetscFunctionReturn(0);
725   }
726 
727   /* Allocate Memory for Hash Table */
728   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
729   baij->ht = (int*)(baij->hd + size);
730   HD = baij->hd;
731   HT = baij->ht;
732 
733 
734   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
735 
736 
737   /* Loop Over A */
738   for ( i=0; i<a->mbs; i++ ) {
739     for ( j=ai[i]; j<ai[i+1]; j++ ) {
740       row = i+rstart;
741       col = aj[j]+cstart;
742 
743       key = row*Nbs + col + 1;
744       h1  = HASH(size,key,tmp);
745       for ( k=0; k<size; k++ ){
746         if (HT[(h1+k)%size] == 0.0) {
747           HT[(h1+k)%size] = key;
748           HD[(h1+k)%size] = a->a + j*bs2;
749           break;
750 #if defined(USE_PETSC_BOPT_g)
751         } else {
752           ct++;
753 #endif
754         }
755       }
756 #if defined(USE_PETSC_BOPT_g)
757       if (k> max) max = k;
758 #endif
759     }
760   }
761   /* Loop Over B */
762   for ( i=0; i<b->mbs; i++ ) {
763     for ( j=bi[i]; j<bi[i+1]; j++ ) {
764       row = i+rstart;
765       col = garray[bj[j]];
766       key = row*Nbs + col + 1;
767       h1  = HASH(size,key,tmp);
768       for ( k=0; k<size; k++ ){
769         if (HT[(h1+k)%size] == 0.0) {
770           HT[(h1+k)%size] = key;
771           HD[(h1+k)%size] = b->a + j*bs2;
772           break;
773 #if defined(USE_PETSC_BOPT_g)
774         } else {
775           ct++;
776 #endif
777         }
778       }
779 #if defined(USE_PETSC_BOPT_g)
780       if (k> max) max = k;
781 #endif
782     }
783   }
784 
785   /* Print Summary */
786 #if defined(USE_PETSC_BOPT_g)
787   for ( i=0,j=0; i<size; i++)
788     if (HT[i]) {j++;}
789   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
790            (j== 0)? 0.0:((double)(ct+j))/j,max);
791 #endif
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNC__
796 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
797 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
800   int         ierr,nstash,reallocs;
801   InsertMode  addv;
802 
803   PetscFunctionBegin;
804   if (baij->donotstash) {
805     PetscFunctionReturn(0);
806   }
807 
808   /* make sure all processors are either in INSERTMODE or ADDMODE */
809   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
810   if (addv == (ADD_VALUES|INSERT_VALUES)) {
811     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
812   }
813   mat->insertmode = addv; /* in case this processor had no cache */
814 
815   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs); CHKERRQ(ierr);
816   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners); CHKERRQ(ierr);
817   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr);
818   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
819   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr);
820   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNC__
825 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
826 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
827 {
828   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
829   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
830   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
831   int         *row,*col,other_disassembled,r1,r2,r3;
832   Scalar      *val;
833   InsertMode  addv = mat->insertmode;
834 
835   PetscFunctionBegin;
836   if (!baij->donotstash) {
837     while (1) {
838       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
839       if (!flg) break;
840 
841       for ( i=0; i<n; ) {
842         /* Now identify the consecutive vals belonging to the same row */
843         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
844         if (j < n) ncols = j-i;
845         else       ncols = n-i;
846         /* Now assemble all these values with a single function call */
847         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
848         i = j;
849       }
850     }
851     ierr = MatStashScatterEnd_Private(&mat->stash); CHKERRQ(ierr);
852     /* Now process the block-stash. Since the values are stashed column-oriented,
853        set the roworiented flag to column oriented, and after MatSetValues()
854        restore the original flags */
855     r1 = baij->roworiented;
856     r2 = a->roworiented;
857     r3 = b->roworiented;
858     baij->roworiented = 0;
859     a->roworiented    = 0;
860     b->roworiented    = 0;
861     while (1) {
862       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
863       if (!flg) break;
864 
865       for ( i=0; i<n; ) {
866         /* Now identify the consecutive vals belonging to the same row */
867         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
868         if (j < n) ncols = j-i;
869         else       ncols = n-i;
870         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr);
871         i = j;
872       }
873     }
874     ierr = MatStashScatterEnd_Private(&mat->bstash); CHKERRQ(ierr);
875     baij->roworiented = r1;
876     a->roworiented    = r2;
877     b->roworiented    = r3;
878   }
879 
880   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
881   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
882 
883   /* determine if any processor has disassembled, if so we must
884      also disassemble ourselfs, in order that we may reassemble. */
885   /*
886      if nonzero structure of submatrix B cannot change then we know that
887      no processor disassembled thus we can skip this stuff
888   */
889   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
890     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
891     if (mat->was_assembled && !other_disassembled) {
892       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
893     }
894   }
895 
896   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
897     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
898   }
899   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
900   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
901 
902 #if defined(USE_PETSC_BOPT_g)
903   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
904     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
905              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
906     baij->ht_total_ct  = 0;
907     baij->ht_insert_ct = 0;
908   }
909 #endif
910   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
911     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
912     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
913     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
914   }
915 
916   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
917   PetscFunctionReturn(0);
918 }
919 
920 #undef __FUNC__
921 #define __FUNC__ "MatView_MPIBAIJ_Binary"
922 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
923 {
924   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
925   int          ierr;
926 
927   PetscFunctionBegin;
928   if (baij->size == 1) {
929     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
930   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNC__
935 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
936 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
937 {
938   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
939   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
940   FILE         *fd;
941   ViewerType   vtype;
942 
943   PetscFunctionBegin;
944   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
945   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
946     ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr);
947     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
948       MatInfo info;
949       MPI_Comm_rank(mat->comm,&rank);
950       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
951       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
952       PetscSequentialPhaseBegin(mat->comm,1);
953       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
954               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
955               baij->bs,(int)info.memory);
956       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
957       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
958       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
959       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
960       fflush(fd);
961       PetscSequentialPhaseEnd(mat->comm,1);
962       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
963       PetscFunctionReturn(0);
964     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
965       PetscPrintf(mat->comm,"  block size is %d\n",bs);
966       PetscFunctionReturn(0);
967     }
968   }
969 
970   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
971     Draw       draw;
972     PetscTruth isnull;
973     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
974     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
975   }
976 
977   if (size == 1) {
978     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
979   } else {
980     /* assemble the entire matrix onto first processor. */
981     Mat         A;
982     Mat_SeqBAIJ *Aloc;
983     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
984     int         mbs = baij->mbs;
985     Scalar      *a;
986 
987     if (!rank) {
988       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
989     } else {
990       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
991     }
992     PLogObjectParent(mat,A);
993 
994     /* copy over the A part */
995     Aloc = (Mat_SeqBAIJ*) baij->A->data;
996     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
997     rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
998 
999     for ( i=0; i<mbs; i++ ) {
1000       rvals[0] = bs*(baij->rstart + i);
1001       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1002       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1003         col = (baij->cstart+aj[j])*bs;
1004         for (k=0; k<bs; k++ ) {
1005           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1006           col++; a += bs;
1007         }
1008       }
1009     }
1010     /* copy over the B part */
1011     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1012     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1013     for ( i=0; i<mbs; i++ ) {
1014       rvals[0] = bs*(baij->rstart + i);
1015       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1016       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1017         col = baij->garray[aj[j]]*bs;
1018         for (k=0; k<bs; k++ ) {
1019           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1020           col++; a += bs;
1021         }
1022       }
1023     }
1024     PetscFree(rvals);
1025     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1026     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1027     /*
1028        Everyone has to call to draw the matrix since the graphics waits are
1029        synchronized across all processors that share the Draw object
1030     */
1031     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1032       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1033     }
1034     ierr = MatDestroy(A); CHKERRQ(ierr);
1035   }
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 
1040 
1041 #undef __FUNC__
1042 #define __FUNC__ "MatView_MPIBAIJ"
1043 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1044 {
1045   int         ierr;
1046   ViewerType  vtype;
1047 
1048   PetscFunctionBegin;
1049   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1050   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1051       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
1052     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
1053   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1054     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1055   } else {
1056     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNC__
1062 #define __FUNC__ "MatDestroy_MPIBAIJ"
1063 int MatDestroy_MPIBAIJ(Mat mat)
1064 {
1065   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1066   int         ierr;
1067 
1068   PetscFunctionBegin;
1069   if (--mat->refct > 0) PetscFunctionReturn(0);
1070 
1071   if (mat->mapping) {
1072     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
1073   }
1074   if (mat->bmapping) {
1075     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
1076   }
1077   if (mat->rmap) {
1078     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1079   }
1080   if (mat->cmap) {
1081     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1082   }
1083 #if defined(USE_PETSC_LOG)
1084   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1085 #endif
1086 
1087   ierr = MatStashDestroy_Private(&mat->stash); CHKERRQ(ierr);
1088   ierr = MatStashDestroy_Private(&mat->bstash); CHKERRQ(ierr);
1089 
1090   PetscFree(baij->rowners);
1091   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1092   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1093 #if defined (USE_CTABLE)
1094   if (baij->colmap) TableDelete(baij->colmap);
1095 #else
1096   if (baij->colmap) PetscFree(baij->colmap);
1097 #endif
1098   if (baij->garray) PetscFree(baij->garray);
1099   if (baij->lvec)   VecDestroy(baij->lvec);
1100   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1101   if (baij->rowvalues) PetscFree(baij->rowvalues);
1102   if (baij->barray) PetscFree(baij->barray);
1103   if (baij->hd) PetscFree(baij->hd);
1104   PetscFree(baij);
1105   PLogObjectDestroy(mat);
1106   PetscHeaderDestroy(mat);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNC__
1111 #define __FUNC__ "MatMult_MPIBAIJ"
1112 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1113 {
1114   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1115   int         ierr, nt;
1116 
1117   PetscFunctionBegin;
1118   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1119   if (nt != a->n) {
1120     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1121   }
1122   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1123   if (nt != a->m) {
1124     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1125   }
1126   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1127   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1128   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1129   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1130   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNC__
1135 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1136 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1137 {
1138   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1139   int        ierr;
1140 
1141   PetscFunctionBegin;
1142   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1143   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1144   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1145   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNC__
1150 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1151 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1152 {
1153   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1154   int         ierr;
1155 
1156   PetscFunctionBegin;
1157   /* do nondiagonal part */
1158   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1159   /* send it on its way */
1160   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1161   /* do local part */
1162   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1163   /* receive remote parts: note this assumes the values are not actually */
1164   /* inserted in yy until the next line, which is true for my implementation*/
1165   /* but is not perhaps always true. */
1166   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNC__
1171 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1172 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1173 {
1174   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1175   int         ierr;
1176 
1177   PetscFunctionBegin;
1178   /* do nondiagonal part */
1179   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1180   /* send it on its way */
1181   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1182   /* do local part */
1183   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1184   /* receive remote parts: note this assumes the values are not actually */
1185   /* inserted in yy until the next line, which is true for my implementation*/
1186   /* but is not perhaps always true. */
1187   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 /*
1192   This only works correctly for square matrices where the subblock A->A is the
1193    diagonal block
1194 */
1195 #undef __FUNC__
1196 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1197 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1198 {
1199   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1200   int         ierr;
1201 
1202   PetscFunctionBegin;
1203   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1204   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNC__
1209 #define __FUNC__ "MatScale_MPIBAIJ"
1210 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1211 {
1212   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1213   int         ierr;
1214 
1215   PetscFunctionBegin;
1216   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1217   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 #undef __FUNC__
1222 #define __FUNC__ "MatGetSize_MPIBAIJ"
1223 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1224 {
1225   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1226 
1227   PetscFunctionBegin;
1228   if (m) *m = mat->M;
1229   if (n) *n = mat->N;
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNC__
1234 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1235 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1236 {
1237   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1238 
1239   PetscFunctionBegin;
1240   *m = mat->m; *n = mat->n;
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNC__
1245 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1246 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1247 {
1248   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1249 
1250   PetscFunctionBegin;
1251   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNC__
1256 #define __FUNC__ "MatGetRow_MPIBAIJ"
1257 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1258 {
1259   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1260   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1261   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1262   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1263   int        *cmap, *idx_p,cstart = mat->cstart;
1264 
1265   PetscFunctionBegin;
1266   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1267   mat->getrowactive = PETSC_TRUE;
1268 
1269   if (!mat->rowvalues && (idx || v)) {
1270     /*
1271         allocate enough space to hold information from the longest row.
1272     */
1273     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1274     int     max = 1,mbs = mat->mbs,tmp;
1275     for ( i=0; i<mbs; i++ ) {
1276       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1277       if (max < tmp) { max = tmp; }
1278     }
1279     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1280     CHKPTRQ(mat->rowvalues);
1281     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1282   }
1283 
1284   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1285   lrow = row - brstart;
1286 
1287   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1288   if (!v)   {pvA = 0; pvB = 0;}
1289   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1290   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1291   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1292   nztot = nzA + nzB;
1293 
1294   cmap  = mat->garray;
1295   if (v  || idx) {
1296     if (nztot) {
1297       /* Sort by increasing column numbers, assuming A and B already sorted */
1298       int imark = -1;
1299       if (v) {
1300         *v = v_p = mat->rowvalues;
1301         for ( i=0; i<nzB; i++ ) {
1302           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1303           else break;
1304         }
1305         imark = i;
1306         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1307         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1308       }
1309       if (idx) {
1310         *idx = idx_p = mat->rowindices;
1311         if (imark > -1) {
1312           for ( i=0; i<imark; i++ ) {
1313             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1314           }
1315         } else {
1316           for ( i=0; i<nzB; i++ ) {
1317             if (cmap[cworkB[i]/bs] < cstart)
1318               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1319             else break;
1320           }
1321           imark = i;
1322         }
1323         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1324         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1325       }
1326     } else {
1327       if (idx) *idx = 0;
1328       if (v)   *v   = 0;
1329     }
1330   }
1331   *nz = nztot;
1332   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1333   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNC__
1338 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1339 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1340 {
1341   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1342 
1343   PetscFunctionBegin;
1344   if (baij->getrowactive == PETSC_FALSE) {
1345     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1346   }
1347   baij->getrowactive = PETSC_FALSE;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNC__
1352 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1353 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1354 {
1355   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1356 
1357   PetscFunctionBegin;
1358   *bs = baij->bs;
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNC__
1363 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1364 int MatZeroEntries_MPIBAIJ(Mat A)
1365 {
1366   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1367   int         ierr;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1371   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNC__
1376 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1377 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1378 {
1379   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1380   Mat         A = a->A, B = a->B;
1381   int         ierr;
1382   double      isend[5], irecv[5];
1383 
1384   PetscFunctionBegin;
1385   info->block_size     = (double)a->bs;
1386   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1387   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1388   isend[3] = info->memory;  isend[4] = info->mallocs;
1389   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1390   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1391   isend[3] += info->memory;  isend[4] += info->mallocs;
1392   if (flag == MAT_LOCAL) {
1393     info->nz_used      = isend[0];
1394     info->nz_allocated = isend[1];
1395     info->nz_unneeded  = isend[2];
1396     info->memory       = isend[3];
1397     info->mallocs      = isend[4];
1398   } else if (flag == MAT_GLOBAL_MAX) {
1399     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1400     info->nz_used      = irecv[0];
1401     info->nz_allocated = irecv[1];
1402     info->nz_unneeded  = irecv[2];
1403     info->memory       = irecv[3];
1404     info->mallocs      = irecv[4];
1405   } else if (flag == MAT_GLOBAL_SUM) {
1406     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1407     info->nz_used      = irecv[0];
1408     info->nz_allocated = irecv[1];
1409     info->nz_unneeded  = irecv[2];
1410     info->memory       = irecv[3];
1411     info->mallocs      = irecv[4];
1412   } else {
1413     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1414   }
1415   info->rows_global       = (double)a->M;
1416   info->columns_global    = (double)a->N;
1417   info->rows_local        = (double)a->m;
1418   info->columns_local     = (double)a->N;
1419   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1420   info->fill_ratio_needed = 0;
1421   info->factor_mallocs    = 0;
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNC__
1426 #define __FUNC__ "MatSetOption_MPIBAIJ"
1427 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1428 {
1429   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1430   int         ierr;
1431 
1432   PetscFunctionBegin;
1433   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1434       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1435       op == MAT_COLUMNS_UNSORTED ||
1436       op == MAT_COLUMNS_SORTED ||
1437       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1438       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1439         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1440         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1441   } else if (op == MAT_ROW_ORIENTED) {
1442         a->roworiented = 1;
1443         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1444         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1445   } else if (op == MAT_ROWS_SORTED ||
1446              op == MAT_ROWS_UNSORTED ||
1447              op == MAT_SYMMETRIC ||
1448              op == MAT_STRUCTURALLY_SYMMETRIC ||
1449              op == MAT_YES_NEW_DIAGONALS ||
1450              op == MAT_USE_HASH_TABLE) {
1451     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1452   } else if (op == MAT_COLUMN_ORIENTED) {
1453     a->roworiented = 0;
1454     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1455     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1456   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1457     a->donotstash = 1;
1458   } else if (op == MAT_NO_NEW_DIAGONALS) {
1459     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1460   } else if (op == MAT_USE_HASH_TABLE) {
1461     a->ht_flag = 1;
1462   } else {
1463     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1464   }
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNC__
1469 #define __FUNC__ "MatTranspose_MPIBAIJ("
1470 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1471 {
1472   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1473   Mat_SeqBAIJ *Aloc;
1474   Mat        B;
1475   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1476   int        bs=baij->bs,mbs=baij->mbs;
1477   Scalar     *a;
1478 
1479   PetscFunctionBegin;
1480   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1481   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1482   CHKERRQ(ierr);
1483 
1484   /* copy over the A part */
1485   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1486   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1487   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1488 
1489   for ( i=0; i<mbs; i++ ) {
1490     rvals[0] = bs*(baij->rstart + i);
1491     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1492     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1493       col = (baij->cstart+aj[j])*bs;
1494       for (k=0; k<bs; k++ ) {
1495         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1496         col++; a += bs;
1497       }
1498     }
1499   }
1500   /* copy over the B part */
1501   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1502   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1503   for ( i=0; i<mbs; i++ ) {
1504     rvals[0] = bs*(baij->rstart + i);
1505     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1506     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1507       col = baij->garray[aj[j]]*bs;
1508       for (k=0; k<bs; k++ ) {
1509         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1510         col++; a += bs;
1511       }
1512     }
1513   }
1514   PetscFree(rvals);
1515   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1516   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1517 
1518   if (matout != PETSC_NULL) {
1519     *matout = B;
1520   } else {
1521     PetscOps *Abops;
1522     MatOps   Aops;
1523 
1524     /* This isn't really an in-place transpose .... but free data structures from baij */
1525     PetscFree(baij->rowners);
1526     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1527     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1528 #if defined (USE_CTABLE)
1529     if (baij->colmap) TableDelete(baij->colmap);
1530 #else
1531     if (baij->colmap) PetscFree(baij->colmap);
1532 #endif
1533     if (baij->garray) PetscFree(baij->garray);
1534     if (baij->lvec) VecDestroy(baij->lvec);
1535     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1536     PetscFree(baij);
1537 
1538     /*
1539        This is horrible, horrible code. We need to keep the
1540       A pointers for the bops and ops but copy everything
1541       else from C.
1542     */
1543     Abops = A->bops;
1544     Aops  = A->ops;
1545     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1546     A->bops = Abops;
1547     A->ops  = Aops;
1548 
1549     PetscHeaderDestroy(B);
1550   }
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 #undef __FUNC__
1555 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1556 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1557 {
1558   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1559   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1560   int ierr,s1,s2,s3;
1561 
1562   PetscFunctionBegin;
1563   if (ll)  {
1564     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1565     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1566     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1567     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1568     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1569   }
1570   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNC__
1575 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1576 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1577 {
1578   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1579   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1580   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1581   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1582   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1583   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1584   MPI_Comm       comm = A->comm;
1585   MPI_Request    *send_waits,*recv_waits;
1586   MPI_Status     recv_status,*send_status;
1587   IS             istmp;
1588 
1589   PetscFunctionBegin;
1590   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1591   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1592 
1593   /*  first count number of contributors to each processor */
1594   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1595   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1596   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1597   for ( i=0; i<N; i++ ) {
1598     idx = rows[i];
1599     found = 0;
1600     for ( j=0; j<size; j++ ) {
1601       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1602         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1603       }
1604     }
1605     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1606   }
1607   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1608 
1609   /* inform other processors of number of messages and max length*/
1610   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1611   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1612   nrecvs = work[rank];
1613   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1614   nmax   = work[rank];
1615   PetscFree(work);
1616 
1617   /* post receives:   */
1618   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1619   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1620   for ( i=0; i<nrecvs; i++ ) {
1621     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1622   }
1623 
1624   /* do sends:
1625      1) starts[i] gives the starting index in svalues for stuff going to
1626      the ith processor
1627   */
1628   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1629   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1630   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1631   starts[0] = 0;
1632   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1633   for ( i=0; i<N; i++ ) {
1634     svalues[starts[owner[i]]++] = rows[i];
1635   }
1636   ISRestoreIndices(is,&rows);
1637 
1638   starts[0] = 0;
1639   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1640   count = 0;
1641   for ( i=0; i<size; i++ ) {
1642     if (procs[i]) {
1643       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1644     }
1645   }
1646   PetscFree(starts);
1647 
1648   base = owners[rank]*bs;
1649 
1650   /*  wait on receives */
1651   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1652   source = lens + nrecvs;
1653   count  = nrecvs; slen = 0;
1654   while (count) {
1655     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1656     /* unpack receives into our local space */
1657     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1658     source[imdex]  = recv_status.MPI_SOURCE;
1659     lens[imdex]  = n;
1660     slen += n;
1661     count--;
1662   }
1663   PetscFree(recv_waits);
1664 
1665   /* move the data into the send scatter */
1666   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1667   count = 0;
1668   for ( i=0; i<nrecvs; i++ ) {
1669     values = rvalues + i*nmax;
1670     for ( j=0; j<lens[i]; j++ ) {
1671       lrows[count++] = values[j] - base;
1672     }
1673   }
1674   PetscFree(rvalues); PetscFree(lens);
1675   PetscFree(owner); PetscFree(nprocs);
1676 
1677   /* actually zap the local rows */
1678   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1679   PLogObjectParent(A,istmp);
1680 
1681   /*
1682         Zero the required rows. If the "diagonal block" of the matrix
1683      is square and the user wishes to set the diagonal we use seperate
1684      code so that MatSetValues() is not called for each diagonal allocating
1685      new memory, thus calling lots of mallocs and slowing things down.
1686 
1687        Contributed by: Mathew Knepley
1688   */
1689   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1690   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1691   if (diag && (l->A->M == l->A->N)) {
1692     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1693   } else if (diag) {
1694     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1695     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1696       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1697 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1698     }
1699     for ( i = 0; i < slen; i++ ) {
1700       row  = lrows[i] + rstart_bs;
1701       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1702     }
1703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1705   } else {
1706     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1707   }
1708 
1709   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1710   PetscFree(lrows);
1711 
1712   /* wait on sends */
1713   if (nsends) {
1714     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1715     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1716     PetscFree(send_status);
1717   }
1718   PetscFree(send_waits); PetscFree(svalues);
1719 
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 #undef __FUNC__
1724 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1725 int MatPrintHelp_MPIBAIJ(Mat A)
1726 {
1727   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1728   MPI_Comm    comm = A->comm;
1729   static int  called = 0;
1730   int         ierr;
1731 
1732   PetscFunctionBegin;
1733   if (!a->rank) {
1734     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1735   }
1736   if (called) {PetscFunctionReturn(0);} else called = 1;
1737   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1738   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNC__
1743 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1744 int MatSetUnfactored_MPIBAIJ(Mat A)
1745 {
1746   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1747   int         ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1755 
1756 #undef __FUNC__
1757 #define __FUNC__ "MatEqual_MPIBAIJ"
1758 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
1759 {
1760   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
1761   Mat         a, b, c, d;
1762   PetscTruth  flg;
1763   int         ierr;
1764 
1765   PetscFunctionBegin;
1766   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1767   a = matA->A; b = matA->B;
1768   c = matB->A; d = matB->B;
1769 
1770   ierr = MatEqual(a, c, &flg); CHKERRQ(ierr);
1771   if (flg == PETSC_TRUE) {
1772     ierr = MatEqual(b, d, &flg); CHKERRQ(ierr);
1773   }
1774   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 /* -------------------------------------------------------------------*/
1779 static struct _MatOps MatOps_Values = {
1780   MatSetValues_MPIBAIJ,
1781   MatGetRow_MPIBAIJ,
1782   MatRestoreRow_MPIBAIJ,
1783   MatMult_MPIBAIJ,
1784   MatMultAdd_MPIBAIJ,
1785   MatMultTrans_MPIBAIJ,
1786   MatMultTransAdd_MPIBAIJ,
1787   0,
1788   0,
1789   0,
1790   0,
1791   0,
1792   0,
1793   0,
1794   MatTranspose_MPIBAIJ,
1795   MatGetInfo_MPIBAIJ,
1796   MatEqual_MPIBAIJ,
1797   MatGetDiagonal_MPIBAIJ,
1798   MatDiagonalScale_MPIBAIJ,
1799   MatNorm_MPIBAIJ,
1800   MatAssemblyBegin_MPIBAIJ,
1801   MatAssemblyEnd_MPIBAIJ,
1802   0,
1803   MatSetOption_MPIBAIJ,
1804   MatZeroEntries_MPIBAIJ,
1805   MatZeroRows_MPIBAIJ,
1806   0,
1807   0,
1808   0,
1809   0,
1810   MatGetSize_MPIBAIJ,
1811   MatGetLocalSize_MPIBAIJ,
1812   MatGetOwnershipRange_MPIBAIJ,
1813   0,
1814   0,
1815   0,
1816   0,
1817   MatDuplicate_MPIBAIJ,
1818   0,
1819   0,
1820   0,
1821   0,
1822   0,
1823   MatGetSubMatrices_MPIBAIJ,
1824   MatIncreaseOverlap_MPIBAIJ,
1825   MatGetValues_MPIBAIJ,
1826   0,
1827   MatPrintHelp_MPIBAIJ,
1828   MatScale_MPIBAIJ,
1829   0,
1830   0,
1831   0,
1832   MatGetBlockSize_MPIBAIJ,
1833   0,
1834   0,
1835   0,
1836   0,
1837   0,
1838   0,
1839   MatSetUnfactored_MPIBAIJ,
1840   0,
1841   MatSetValuesBlocked_MPIBAIJ,
1842   0,
1843   0,
1844   0,
1845   MatGetMaps_Petsc};
1846 
1847 
1848 EXTERN_C_BEGIN
1849 #undef __FUNC__
1850 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1851 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1852 {
1853   PetscFunctionBegin;
1854   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1855   *iscopy = PETSC_FALSE;
1856   PetscFunctionReturn(0);
1857 }
1858 EXTERN_C_END
1859 
1860 #undef __FUNC__
1861 #define __FUNC__ "MatCreateMPIBAIJ"
1862 /*@C
1863    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1864    (block compressed row).  For good matrix assembly performance
1865    the user should preallocate the matrix storage by setting the parameters
1866    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1867    performance can be increased by more than a factor of 50.
1868 
1869    Collective on MPI_Comm
1870 
1871    Input Parameters:
1872 +  comm - MPI communicator
1873 .  bs   - size of blockk
1874 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1875            This value should be the same as the local size used in creating the
1876            y vector for the matrix-vector product y = Ax.
1877 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1878            This value should be the same as the local size used in creating the
1879            x vector for the matrix-vector product y = Ax.
1880 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1881 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1882 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1883            submatrix  (same for all local rows)
1884 .  d_nnz - array containing the number of block nonzeros in the various block rows
1885            of the in diagonal portion of the local (possibly different for each block
1886            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1887 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1888            submatrix (same for all local rows).
1889 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1890            off-diagonal portion of the local submatrix (possibly different for
1891            each block row) or PETSC_NULL.
1892 
1893    Output Parameter:
1894 .  A - the matrix
1895 
1896    Options Database Keys:
1897 .   -mat_no_unroll - uses code that does not unroll the loops in the
1898                      block calculations (much slower)
1899 .   -mat_block_size - size of the blocks to use
1900 .   -mat_mpi - use the parallel matrix data structures even on one processor
1901                (defaults to using SeqBAIJ format on one processor)
1902 
1903    Notes:
1904    The user MUST specify either the local or global matrix dimensions
1905    (possibly both).
1906 
1907    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1908    than it must be used on all processors that share the object for that argument.
1909 
1910    Storage Information:
1911    For a square global matrix we define each processor's diagonal portion
1912    to be its local rows and the corresponding columns (a square submatrix);
1913    each processor's off-diagonal portion encompasses the remainder of the
1914    local matrix (a rectangular submatrix).
1915 
1916    The user can specify preallocated storage for the diagonal part of
1917    the local submatrix with either d_nz or d_nnz (not both).  Set
1918    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1919    memory allocation.  Likewise, specify preallocated storage for the
1920    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1921 
1922    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1923    the figure below we depict these three local rows and all columns (0-11).
1924 
1925 .vb
1926            0 1 2 3 4 5 6 7 8 9 10 11
1927           -------------------
1928    row 3  |  o o o d d d o o o o o o
1929    row 4  |  o o o d d d o o o o o o
1930    row 5  |  o o o d d d o o o o o o
1931           -------------------
1932 .ve
1933 
1934    Thus, any entries in the d locations are stored in the d (diagonal)
1935    submatrix, and any entries in the o locations are stored in the
1936    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1937    stored simply in the MATSEQBAIJ format for compressed row storage.
1938 
1939    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1940    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1941    In general, for PDE problems in which most nonzeros are near the diagonal,
1942    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1943    or you will get TERRIBLE performance; see the users' manual chapter on
1944    matrices.
1945 
1946    Level: intermediate
1947 
1948 .keywords: matrix, block, aij, compressed row, sparse, parallel
1949 
1950 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1951 @*/
1952 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1953                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1954 {
1955   Mat          B;
1956   Mat_MPIBAIJ  *b;
1957   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1958   int          flag1 = 0,flag2 = 0;
1959 
1960   PetscFunctionBegin;
1961   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1962 
1963   MPI_Comm_size(comm,&size);
1964   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
1965   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1966   if (!flag1 && !flag2 && size == 1) {
1967     if (M == PETSC_DECIDE) M = m;
1968     if (N == PETSC_DECIDE) N = n;
1969     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1970     PetscFunctionReturn(0);
1971   }
1972 
1973   *A = 0;
1974   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
1975   PLogObjectCreate(B);
1976   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1977   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1978   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1979 
1980   B->ops->destroy    = MatDestroy_MPIBAIJ;
1981   B->ops->view       = MatView_MPIBAIJ;
1982   B->mapping    = 0;
1983   B->factor     = 0;
1984   B->assembled  = PETSC_FALSE;
1985 
1986   B->insertmode = NOT_SET_VALUES;
1987   MPI_Comm_rank(comm,&b->rank);
1988   MPI_Comm_size(comm,&b->size);
1989 
1990   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1991     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1992   }
1993   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1994     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1995   }
1996   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1997     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1998   }
1999   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2000   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2001 
2002   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2003     work[0] = m; work[1] = n;
2004     mbs = m/bs; nbs = n/bs;
2005     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2006     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2007     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2008   }
2009   if (m == PETSC_DECIDE) {
2010     Mbs = M/bs;
2011     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2012     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2013     m   = mbs*bs;
2014   }
2015   if (n == PETSC_DECIDE) {
2016     Nbs = N/bs;
2017     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2018     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2019     n   = nbs*bs;
2020   }
2021   if (mbs*bs != m || nbs*bs != n) {
2022     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2023   }
2024 
2025   b->m = m; B->m = m;
2026   b->n = n; B->n = n;
2027   b->N = N; B->N = N;
2028   b->M = M; B->M = M;
2029   b->bs  = bs;
2030   b->bs2 = bs*bs;
2031   b->mbs = mbs;
2032   b->nbs = nbs;
2033   b->Mbs = Mbs;
2034   b->Nbs = Nbs;
2035 
2036   /* the information in the maps duplicates the information computed below, eventually
2037      we should remove the duplicate information that is not contained in the maps */
2038   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2039   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2040 
2041   /* build local table of row and column ownerships */
2042   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
2043   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2044   b->cowners    = b->rowners + b->size + 2;
2045   b->rowners_bs = b->cowners + b->size + 2;
2046   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2047   b->rowners[0]    = 0;
2048   for ( i=2; i<=b->size; i++ ) {
2049     b->rowners[i] += b->rowners[i-1];
2050   }
2051   for ( i=0; i<=b->size; i++ ) {
2052     b->rowners_bs[i] = b->rowners[i]*bs;
2053   }
2054   b->rstart    = b->rowners[b->rank];
2055   b->rend      = b->rowners[b->rank+1];
2056   b->rstart_bs = b->rstart * bs;
2057   b->rend_bs   = b->rend * bs;
2058 
2059   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2060   b->cowners[0] = 0;
2061   for ( i=2; i<=b->size; i++ ) {
2062     b->cowners[i] += b->cowners[i-1];
2063   }
2064   b->cstart    = b->cowners[b->rank];
2065   b->cend      = b->cowners[b->rank+1];
2066   b->cstart_bs = b->cstart * bs;
2067   b->cend_bs   = b->cend * bs;
2068 
2069 
2070   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2071   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2072   PLogObjectParent(B,b->A);
2073   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2074   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2075   PLogObjectParent(B,b->B);
2076 
2077   /* build cache for off array entries formed */
2078   ierr = MatStashCreate_Private(comm,1,&B->stash); CHKERRQ(ierr);
2079   ierr = MatStashCreate_Private(comm,bs,&B->bstash); CHKERRQ(ierr);
2080   b->donotstash  = 0;
2081   b->colmap      = 0;
2082   b->garray      = 0;
2083   b->roworiented = 1;
2084 
2085   /* stuff used in block assembly */
2086   b->barray       = 0;
2087 
2088   /* stuff used for matrix vector multiply */
2089   b->lvec         = 0;
2090   b->Mvctx        = 0;
2091 
2092   /* stuff for MatGetRow() */
2093   b->rowindices   = 0;
2094   b->rowvalues    = 0;
2095   b->getrowactive = PETSC_FALSE;
2096 
2097   /* hash table stuff */
2098   b->ht           = 0;
2099   b->hd           = 0;
2100   b->ht_size      = 0;
2101   b->ht_flag      = 0;
2102   b->ht_fact      = 0;
2103   b->ht_total_ct  = 0;
2104   b->ht_insert_ct = 0;
2105 
2106   *A = B;
2107   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2108   if (flg) {
2109     double fact = 1.39;
2110     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2111     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2112     if (fact <= 1.0) fact = 1.39;
2113     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2114     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2115   }
2116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
2117                                      "MatStoreValues_MPIBAIJ",
2118                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
2120                                      "MatRetrieveValues_MPIBAIJ",
2121                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2122   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2123                                      "MatGetDiagonalBlock_MPIBAIJ",
2124                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNC__
2129 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2130 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2131 {
2132   Mat         mat;
2133   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2134   int         ierr, len=0, flg;
2135 
2136   PetscFunctionBegin;
2137   *newmat       = 0;
2138   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2139   PLogObjectCreate(mat);
2140   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2141   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2142   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2143   mat->ops->view       = MatView_MPIBAIJ;
2144   mat->factor          = matin->factor;
2145   mat->assembled       = PETSC_TRUE;
2146   mat->insertmode      = NOT_SET_VALUES;
2147 
2148   a->m = mat->m   = oldmat->m;
2149   a->n = mat->n   = oldmat->n;
2150   a->M = mat->M   = oldmat->M;
2151   a->N = mat->N   = oldmat->N;
2152 
2153   a->bs  = oldmat->bs;
2154   a->bs2 = oldmat->bs2;
2155   a->mbs = oldmat->mbs;
2156   a->nbs = oldmat->nbs;
2157   a->Mbs = oldmat->Mbs;
2158   a->Nbs = oldmat->Nbs;
2159 
2160   a->rstart       = oldmat->rstart;
2161   a->rend         = oldmat->rend;
2162   a->cstart       = oldmat->cstart;
2163   a->cend         = oldmat->cend;
2164   a->size         = oldmat->size;
2165   a->rank         = oldmat->rank;
2166   a->donotstash   = oldmat->donotstash;
2167   a->roworiented  = oldmat->roworiented;
2168   a->rowindices   = 0;
2169   a->rowvalues    = 0;
2170   a->getrowactive = PETSC_FALSE;
2171   a->barray       = 0;
2172   a->rstart_bs    = oldmat->rstart_bs;
2173   a->rend_bs      = oldmat->rend_bs;
2174   a->cstart_bs    = oldmat->cstart_bs;
2175   a->cend_bs      = oldmat->cend_bs;
2176 
2177   /* hash table stuff */
2178   a->ht           = 0;
2179   a->hd           = 0;
2180   a->ht_size      = 0;
2181   a->ht_flag      = oldmat->ht_flag;
2182   a->ht_fact      = oldmat->ht_fact;
2183   a->ht_total_ct  = 0;
2184   a->ht_insert_ct = 0;
2185 
2186 
2187   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2188   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2189   a->cowners    = a->rowners + a->size + 2;
2190   a->rowners_bs = a->cowners + a->size + 2;
2191   PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));
2192   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash); CHKERRQ(ierr);
2193   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash); CHKERRQ(ierr);
2194   if (oldmat->colmap) {
2195 #if defined (USE_CTABLE)
2196   ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
2197 #else
2198     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2199     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2200     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2201 #endif
2202   } else a->colmap = 0;
2203   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2204     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2205     PLogObjectMemory(mat,len*sizeof(int));
2206     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2207   } else a->garray = 0;
2208 
2209   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2210   PLogObjectParent(mat,a->lvec);
2211   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2212 
2213   PLogObjectParent(mat,a->Mvctx);
2214   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
2215   PLogObjectParent(mat,a->A);
2216   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
2217   PLogObjectParent(mat,a->B);
2218   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2219   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2220   if (flg) {
2221     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2222   }
2223   *newmat = mat;
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 #include "sys.h"
2228 
2229 #undef __FUNC__
2230 #define __FUNC__ "MatLoad_MPIBAIJ"
2231 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2232 {
2233   Mat          A;
2234   int          i, nz, ierr, j,rstart, rend, fd;
2235   Scalar       *vals,*buf;
2236   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2237   MPI_Status   status;
2238   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2239   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2240   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2241   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2242   int          dcount,kmax,k,nzcount,tmp;
2243 
2244   PetscFunctionBegin;
2245   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2246 
2247   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2248   if (!rank) {
2249     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2250     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2251     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2252     if (header[3] < 0) {
2253       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2254     }
2255   }
2256 
2257   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2258   M = header[1]; N = header[2];
2259 
2260   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2261 
2262   /*
2263      This code adds extra rows to make sure the number of rows is
2264      divisible by the blocksize
2265   */
2266   Mbs        = M/bs;
2267   extra_rows = bs - M + bs*(Mbs);
2268   if (extra_rows == bs) extra_rows = 0;
2269   else                  Mbs++;
2270   if (extra_rows &&!rank) {
2271     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2272   }
2273 
2274   /* determine ownership of all rows */
2275   mbs = Mbs/size + ((Mbs % size) > rank);
2276   m   = mbs * bs;
2277   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2278   browners = rowners + size + 1;
2279   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2280   rowners[0] = 0;
2281   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2282   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2283   rstart = rowners[rank];
2284   rend   = rowners[rank+1];
2285 
2286   /* distribute row lengths to all processors */
2287   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2288   if (!rank) {
2289     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2290     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2291     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2292     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2293     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2294     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2295     PetscFree(sndcounts);
2296   } else {
2297     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2298   }
2299 
2300   if (!rank) {
2301     /* calculate the number of nonzeros on each processor */
2302     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2303     PetscMemzero(procsnz,size*sizeof(int));
2304     for ( i=0; i<size; i++ ) {
2305       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2306         procsnz[i] += rowlengths[j];
2307       }
2308     }
2309     PetscFree(rowlengths);
2310 
2311     /* determine max buffer needed and allocate it */
2312     maxnz = 0;
2313     for ( i=0; i<size; i++ ) {
2314       maxnz = PetscMax(maxnz,procsnz[i]);
2315     }
2316     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2317 
2318     /* read in my part of the matrix column indices  */
2319     nz = procsnz[0];
2320     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2321     mycols = ibuf;
2322     if (size == 1)  nz -= extra_rows;
2323     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2324     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2325 
2326     /* read in every ones (except the last) and ship off */
2327     for ( i=1; i<size-1; i++ ) {
2328       nz   = procsnz[i];
2329       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2330       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2331     }
2332     /* read in the stuff for the last proc */
2333     if ( size != 1 ) {
2334       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2335       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2336       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2337       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2338     }
2339     PetscFree(cols);
2340   } else {
2341     /* determine buffer space needed for message */
2342     nz = 0;
2343     for ( i=0; i<m; i++ ) {
2344       nz += locrowlens[i];
2345     }
2346     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2347     mycols = ibuf;
2348     /* receive message of column indices*/
2349     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2350     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2351     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2352   }
2353 
2354   /* loop over local rows, determining number of off diagonal entries */
2355   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2356   odlens = dlens + (rend-rstart);
2357   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2358   PetscMemzero(mask,3*Mbs*sizeof(int));
2359   masked1 = mask    + Mbs;
2360   masked2 = masked1 + Mbs;
2361   rowcount = 0; nzcount = 0;
2362   for ( i=0; i<mbs; i++ ) {
2363     dcount  = 0;
2364     odcount = 0;
2365     for ( j=0; j<bs; j++ ) {
2366       kmax = locrowlens[rowcount];
2367       for ( k=0; k<kmax; k++ ) {
2368         tmp = mycols[nzcount++]/bs;
2369         if (!mask[tmp]) {
2370           mask[tmp] = 1;
2371           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2372           else masked1[dcount++] = tmp;
2373         }
2374       }
2375       rowcount++;
2376     }
2377 
2378     dlens[i]  = dcount;
2379     odlens[i] = odcount;
2380 
2381     /* zero out the mask elements we set */
2382     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2383     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2384   }
2385 
2386   /* create our matrix */
2387   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2388          CHKERRQ(ierr);
2389   A = *newmat;
2390   MatSetOption(A,MAT_COLUMNS_SORTED);
2391 
2392   if (!rank) {
2393     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2394     /* read in my part of the matrix numerical values  */
2395     nz = procsnz[0];
2396     vals = buf;
2397     mycols = ibuf;
2398     if (size == 1)  nz -= extra_rows;
2399     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2400     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2401 
2402     /* insert into matrix */
2403     jj      = rstart*bs;
2404     for ( i=0; i<m; i++ ) {
2405       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2406       mycols += locrowlens[i];
2407       vals   += locrowlens[i];
2408       jj++;
2409     }
2410     /* read in other processors (except the last one) and ship out */
2411     for ( i=1; i<size-1; i++ ) {
2412       nz   = procsnz[i];
2413       vals = buf;
2414       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2415       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2416     }
2417     /* the last proc */
2418     if ( size != 1 ){
2419       nz   = procsnz[i] - extra_rows;
2420       vals = buf;
2421       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2422       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2423       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2424     }
2425     PetscFree(procsnz);
2426   } else {
2427     /* receive numeric values */
2428     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2429 
2430     /* receive message of values*/
2431     vals   = buf;
2432     mycols = ibuf;
2433     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2434     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2435     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2436 
2437     /* insert into matrix */
2438     jj      = rstart*bs;
2439     for ( i=0; i<m; i++ ) {
2440       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2441       mycols += locrowlens[i];
2442       vals   += locrowlens[i];
2443       jj++;
2444     }
2445   }
2446   PetscFree(locrowlens);
2447   PetscFree(buf);
2448   PetscFree(ibuf);
2449   PetscFree(rowners);
2450   PetscFree(dlens);
2451   PetscFree(mask);
2452   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2453   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 
2458 
2459 #undef __FUNC__
2460 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2461 /*@
2462    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2463 
2464    Input Parameters:
2465 .  mat  - the matrix
2466 .  fact - factor
2467 
2468    Collective on Mat
2469 
2470    Level: advanced
2471 
2472   Notes:
2473    This can also be set by the command line option: -mat_use_hash_table fact
2474 
2475 .keywords: matrix, hashtable, factor, HT
2476 
2477 .seealso: MatSetOption()
2478 @*/
2479 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2480 {
2481   Mat_MPIBAIJ *baij;
2482 
2483   PetscFunctionBegin;
2484   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2485   if (mat->type != MATMPIBAIJ) {
2486       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2487   }
2488   baij = (Mat_MPIBAIJ*) mat->data;
2489   baij->ht_fact = fact;
2490   PetscFunctionReturn(0);
2491 }
2492