xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision c38d4ed214221df9ea04de46f7761bef149d00ff)
1 /*$Id: mpibaij.c,v 1.183 1999/11/05 14:45:37 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 extern int MatSetUpMultiply_MPIBAIJ(Mat);
7 extern int DisAssemble_MPIBAIJ(Mat);
8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15 extern int MatPrintHelp_SeqBAIJ(Mat);
16 
17 EXTERN_C_BEGIN
18 #undef __FUNC__
19 #define __FUNC__ "MatStoreValues_MPIBAIJ"
20 int MatStoreValues_MPIBAIJ(Mat mat)
21 {
22   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
23   int         ierr;
24 
25   PetscFunctionBegin;
26   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
27   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 EXTERN_C_END
31 
32 EXTERN_C_BEGIN
33 #undef __FUNC__
34 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
35 int MatRetrieveValues_MPIBAIJ(Mat mat)
36 {
37   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
38   int         ierr;
39 
40   PetscFunctionBegin;
41   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
42   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
43   PetscFunctionReturn(0);
44 }
45 EXTERN_C_END
46 
47 /*
48      Local utility routine that creates a mapping from the global column
49    number to the local number in the off-diagonal part of the local
50    storage of the matrix.  This is done in a non scable way since the
51    length of colmap equals the global matrix length.
52 */
53 #undef __FUNC__
54 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
55 static int CreateColmap_MPIBAIJ_Private(Mat mat)
56 {
57   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
58   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
59   int         nbs = B->nbs,i,bs=B->bs,ierr;
60 
61   PetscFunctionBegin;
62 #if defined (PETSC_USE_CTABLE)
63   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
64   for ( i=0; i<nbs; i++ ){
65     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
66   }
67 #else
68   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
69   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
70   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
71   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
72 #endif
73   PetscFunctionReturn(0);
74 }
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for ( _i=low; _i<high; _i++ ) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         Scalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
109  \
110         /* malloc new storage space */ \
111         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
112         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
113         new_j   = (int *) (new_a + bs2*new_nz); \
114         new_i   = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
118         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
122                           len*sizeof(int));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
124         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
125         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
126                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
127         /* free up old matrix storage */ \
128         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
129         if (!a->singlemalloc) { \
130           ierr = PetscFree(a->i);CHKERRQ(ierr); \
131           ierr = PetscFree(a->j);CHKERRQ(ierr);\
132         } \
133         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
134         a->singlemalloc = 1; \
135  \
136         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
137         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
138         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
139         a->maxnz += bs2*CHUNKSIZE; \
140         a->reallocs++; \
141         a->nz++; \
142       } \
143       N = nrow++ - 1;  \
144       /* shift up all the later entries in this row */ \
145       for ( ii=N; ii>=_i; ii-- ) { \
146         rp[ii+1] = rp[ii]; \
147         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
148       } \
149       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
150       rp[_i]                      = bcol;  \
151       ap[bs2*_i + bs*cidx + ridx] = value;  \
152       a_noinsert:; \
153     ailen[brow] = nrow; \
154 }
155 
156 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
157 { \
158  \
159     brow = row/bs;  \
160     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
161     rmax = bimax[brow]; nrow = bilen[brow]; \
162       bcol = col/bs; \
163       ridx = row % bs; cidx = col % bs; \
164       low = 0; high = nrow; \
165       while (high-low > 3) { \
166         t = (low+high)/2; \
167         if (rp[t] > bcol) high = t; \
168         else              low  = t; \
169       } \
170       for ( _i=low; _i<high; _i++ ) { \
171         if (rp[_i] > bcol) break; \
172         if (rp[_i] == bcol) { \
173           bap  = ap +  bs2*_i + bs*cidx + ridx; \
174           if (addv == ADD_VALUES) *bap += value;  \
175           else                    *bap  = value;  \
176           goto b_noinsert; \
177         } \
178       } \
179       if (b->nonew == 1) goto b_noinsert; \
180       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
181       if (nrow >= rmax) { \
182         /* there is no extra room in row, therefore enlarge */ \
183         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
184         Scalar *new_a; \
185  \
186         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
187  \
188         /* malloc new storage space */ \
189         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
190         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
191         new_j   = (int *) (new_a + bs2*new_nz); \
192         new_i   = new_j + new_nz; \
193  \
194         /* copy over old data into new slots */ \
195         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
196         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
197         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
198         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
199         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
200                            len*sizeof(int));CHKERRQ(ierr); \
201         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
202         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
203         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
204                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
205         /* free up old matrix storage */ \
206         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
207         if (!b->singlemalloc) { \
208           ierr = PetscFree(b->i);CHKERRQ(ierr); \
209           ierr = PetscFree(b->j);CHKERRQ(ierr); \
210         } \
211         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
212         b->singlemalloc = 1; \
213  \
214         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
215         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
216         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
217         b->maxnz += bs2*CHUNKSIZE; \
218         b->reallocs++; \
219         b->nz++; \
220       } \
221       N = nrow++ - 1;  \
222       /* shift up all the later entries in this row */ \
223       for ( ii=N; ii>=_i; ii-- ) { \
224         rp[ii+1] = rp[ii]; \
225         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
226       } \
227       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
228       rp[_i]                      = bcol;  \
229       ap[bs2*_i + bs*cidx + ridx] = value;  \
230       b_noinsert:; \
231     bilen[brow] = nrow; \
232 }
233 
234 #undef __FUNC__
235 #define __FUNC__ "MatSetValues_MPIBAIJ"
236 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
237 {
238   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
239   Scalar      value;
240   int         ierr,i,j,row,col;
241   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
242   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
243   int         cend_orig=baij->cend_bs,bs=baij->bs;
244 
245   /* Some Variables required in the macro */
246   Mat         A = baij->A;
247   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
248   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
249   Scalar      *aa=a->a;
250 
251   Mat         B = baij->B;
252   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
253   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
254   Scalar      *ba=b->a;
255 
256   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
257   int         low,high,t,ridx,cidx,bs2=a->bs2;
258   Scalar      *ap,*bap;
259 
260   PetscFunctionBegin;
261   for ( i=0; i<m; i++ ) {
262     if (im[i] < 0) continue;
263 #if defined(PETSC_USE_BOPT_g)
264     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
265 #endif
266     if (im[i] >= rstart_orig && im[i] < rend_orig) {
267       row = im[i] - rstart_orig;
268       for ( j=0; j<n; j++ ) {
269         if (in[j] >= cstart_orig && in[j] < cend_orig){
270           col = in[j] - cstart_orig;
271           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
272           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
273           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
274         } else if (in[j] < 0) continue;
275 #if defined(PETSC_USE_BOPT_g)
276         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
277 #endif
278         else {
279           if (mat->was_assembled) {
280             if (!baij->colmap) {
281               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
282             }
283 #if defined (PETSC_USE_CTABLE)
284             ierr = PetscTableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
285             col  = col - 1 + in[j]%bs;
286 #else
287             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
288 #endif
289             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
290               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
291               col =  in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B = baij->B;
294               b = (Mat_SeqBAIJ *) (B)->data;
295               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
296               ba=b->a;
297             }
298           } else col = in[j];
299           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
300           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
301           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
302         }
303       }
304     } else {
305       if ( !baij->donotstash) {
306         if (roworiented) {
307           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
308         } else {
309           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
310         }
311       }
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNC__
318 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
319 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
320 {
321   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
322   Scalar      *value,*barray=baij->barray;
323   int         ierr,i,j,ii,jj,row,col;
324   int         roworiented = baij->roworiented,rstart=baij->rstart ;
325   int         rend=baij->rend,cstart=baij->cstart,stepval;
326   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
327 
328   PetscFunctionBegin;
329   if(!barray) {
330     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
331   }
332 
333   if (roworiented) {
334     stepval = (n-1)*bs;
335   } else {
336     stepval = (m-1)*bs;
337   }
338   for ( i=0; i<m; i++ ) {
339     if (im[i] < 0) continue;
340 #if defined(PETSC_USE_BOPT_g)
341     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
342 #endif
343     if (im[i] >= rstart && im[i] < rend) {
344       row = im[i] - rstart;
345       for ( j=0; j<n; j++ ) {
346         /* If NumCol = 1 then a copy is not required */
347         if ((roworiented) && (n == 1)) {
348           barray = v + i*bs2;
349         } else if((!roworiented) && (m == 1)) {
350           barray = v + j*bs2;
351         } else { /* Here a copy is required */
352           if (roworiented) {
353             value = v + i*(stepval+bs)*bs + j*bs;
354           } else {
355             value = v + j*(stepval+bs)*bs + i*bs;
356           }
357           for ( ii=0; ii<bs; ii++,value+=stepval ) {
358             for (jj=0; jj<bs; jj++ ) {
359               *barray++  = *value++;
360             }
361           }
362           barray -=bs2;
363         }
364 
365         if (in[j] >= cstart && in[j] < cend){
366           col  = in[j] - cstart;
367           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
368         }
369         else if (in[j] < 0) continue;
370 #if defined(PETSC_USE_BOPT_g)
371         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
372 #endif
373         else {
374           if (mat->was_assembled) {
375             if (!baij->colmap) {
376               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
377             }
378 
379 #if defined(PETSC_USE_BOPT_g)
380 #if defined (PETSC_USE_CTABLE)
381             { int data;
382               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
383               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
384             }
385 #else
386             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
387 #endif
388 #endif
389 #if defined (PETSC_USE_CTABLE)
390 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
391             col  = (col - 1)/bs;
392 #else
393             col = (baij->colmap[in[j]] - 1)/bs;
394 #endif
395             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
396               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
397               col =  in[j];
398             }
399           }
400           else col = in[j];
401           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
402         }
403       }
404     } else {
405       if (!baij->donotstash) {
406         if (roworiented) {
407           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
408         } else {
409           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
410         }
411       }
412     }
413   }
414   PetscFunctionReturn(0);
415 }
416 #define HASH_KEY 0.6180339887
417 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
418 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
419 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
420 #undef __FUNC__
421 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
422 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
423 {
424   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
425   int         ierr,i,j,row,col;
426   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
427   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
428   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
429   double      tmp;
430   Scalar      ** HD = baij->hd,value;
431 #if defined(PETSC_USE_BOPT_g)
432   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
433 #endif
434 
435   PetscFunctionBegin;
436 
437   for ( i=0; i<m; i++ ) {
438 #if defined(PETSC_USE_BOPT_g)
439     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
440     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
441 #endif
442       row = im[i];
443     if (row >= rstart_orig && row < rend_orig) {
444       for ( j=0; j<n; j++ ) {
445         col = in[j];
446         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
447         /* Look up into the Hash Table */
448         key = (row/bs)*Nbs+(col/bs)+1;
449         h1  = HASH(size,key,tmp);
450 
451 
452         idx = h1;
453 #if defined(PETSC_USE_BOPT_g)
454         insert_ct++;
455         total_ct++;
456         if (HT[idx] != key) {
457           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
458           if (idx == size) {
459             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
460             if (idx == h1) {
461               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
462             }
463           }
464         }
465 #else
466         if (HT[idx] != key) {
467           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
468           if (idx == size) {
469             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
470             if (idx == h1) {
471               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
472             }
473           }
474         }
475 #endif
476         /* A HASH table entry is found, so insert the values at the correct address */
477         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
478         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
479       }
480     } else {
481       if (!baij->donotstash) {
482         if (roworiented) {
483           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
484         } else {
485           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
486         }
487       }
488     }
489   }
490 #if defined(PETSC_USE_BOPT_g)
491   baij->ht_total_ct = total_ct;
492   baij->ht_insert_ct = insert_ct;
493 #endif
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNC__
498 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
499 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
500 {
501   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
502   int         ierr,i,j,ii,jj,row,col;
503   int         roworiented = baij->roworiented,rstart=baij->rstart ;
504   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
505   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
506   double      tmp;
507   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
508 #if defined(PETSC_USE_BOPT_g)
509   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
510 #endif
511 
512   PetscFunctionBegin;
513 
514   if (roworiented) {
515     stepval = (n-1)*bs;
516   } else {
517     stepval = (m-1)*bs;
518   }
519   for ( i=0; i<m; i++ ) {
520 #if defined(PETSC_USE_BOPT_g)
521     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
522     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
523 #endif
524     row   = im[i];
525     v_t   = v + i*bs2;
526     if (row >= rstart && row < rend) {
527       for ( j=0; j<n; j++ ) {
528         col = in[j];
529 
530         /* Look up into the Hash Table */
531         key = row*Nbs+col+1;
532         h1  = HASH(size,key,tmp);
533 
534         idx = h1;
535 #if defined(PETSC_USE_BOPT_g)
536         total_ct++;
537         insert_ct++;
538        if (HT[idx] != key) {
539           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
540           if (idx == size) {
541             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
542             if (idx == h1) {
543               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
544             }
545           }
546         }
547 #else
548         if (HT[idx] != key) {
549           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
550           if (idx == size) {
551             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
552             if (idx == h1) {
553               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
554             }
555           }
556         }
557 #endif
558         baij_a = HD[idx];
559         if (roworiented) {
560           /*value = v + i*(stepval+bs)*bs + j*bs;*/
561           /* value = v + (i*(stepval+bs)+j)*bs; */
562           value = v_t;
563           v_t  += bs;
564           if (addv == ADD_VALUES) {
565             for ( ii=0; ii<bs; ii++,value+=stepval) {
566               for ( jj=ii; jj<bs2; jj+=bs ) {
567                 baij_a[jj]  += *value++;
568               }
569             }
570           } else {
571             for ( ii=0; ii<bs; ii++,value+=stepval) {
572               for ( jj=ii; jj<bs2; jj+=bs ) {
573                 baij_a[jj]  = *value++;
574               }
575             }
576           }
577         } else {
578           value = v + j*(stepval+bs)*bs + i*bs;
579           if (addv == ADD_VALUES) {
580             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
581               for ( jj=0; jj<bs; jj++ ) {
582                 baij_a[jj]  += *value++;
583               }
584             }
585           } else {
586             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
587               for ( jj=0; jj<bs; jj++ ) {
588                 baij_a[jj]  = *value++;
589               }
590             }
591           }
592         }
593       }
594     } else {
595       if (!baij->donotstash) {
596         if (roworiented) {
597           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
598         } else {
599           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
600         }
601       }
602     }
603   }
604 #if defined(PETSC_USE_BOPT_g)
605   baij->ht_total_ct = total_ct;
606   baij->ht_insert_ct = insert_ct;
607 #endif
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNC__
612 #define __FUNC__ "MatGetValues_MPIBAIJ"
613 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
614 {
615   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
616   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
617   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
618 
619   PetscFunctionBegin;
620   for ( i=0; i<m; i++ ) {
621     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
622     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
623     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
624       row = idxm[i] - bsrstart;
625       for ( j=0; j<n; j++ ) {
626         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
627         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
628         if (idxn[j] >= bscstart && idxn[j] < bscend){
629           col = idxn[j] - bscstart;
630           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
631         } else {
632           if (!baij->colmap) {
633             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
634           }
635 #if defined (PETSC_USE_CTABLE)
636           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
637           data --;
638 #else
639           data = baij->colmap[idxn[j]/bs]-1;
640 #endif
641           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
642           else {
643             col  = data + idxn[j]%bs;
644             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
645           }
646         }
647       }
648     } else {
649       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
650     }
651   }
652  PetscFunctionReturn(0);
653 }
654 
655 #undef __FUNC__
656 #define __FUNC__ "MatNorm_MPIBAIJ"
657 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
658 {
659   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
660   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
661   int        ierr, i,bs2=baij->bs2;
662   double     sum = 0.0;
663   Scalar     *v;
664 
665   PetscFunctionBegin;
666   if (baij->size == 1) {
667     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
668   } else {
669     if (type == NORM_FROBENIUS) {
670       v = amat->a;
671       for (i=0; i<amat->nz*bs2; i++ ) {
672 #if defined(PETSC_USE_COMPLEX)
673         sum += PetscReal(PetscConj(*v)*(*v)); v++;
674 #else
675         sum += (*v)*(*v); v++;
676 #endif
677       }
678       v = bmat->a;
679       for (i=0; i<bmat->nz*bs2; i++ ) {
680 #if defined(PETSC_USE_COMPLEX)
681         sum += PetscReal(PetscConj(*v)*(*v)); v++;
682 #else
683         sum += (*v)*(*v); v++;
684 #endif
685       }
686       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
687       *norm = sqrt(*norm);
688     } else {
689       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
690     }
691   }
692   PetscFunctionReturn(0);
693 }
694 
695 
696 /*
697   Creates the hash table, and sets the table
698   This table is created only once.
699   If new entried need to be added to the matrix
700   then the hash table has to be destroyed and
701   recreated.
702 */
703 #undef __FUNC__
704 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
705 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
706 {
707   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
708   Mat         A = baij->A, B=baij->B;
709   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
710   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
711   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
712   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
713   int         *HT,key;
714   Scalar      **HD;
715   double      tmp;
716 #if defined(PETSC_USE_BOPT_g)
717   int         ct=0,max=0;
718 #endif
719 
720   PetscFunctionBegin;
721   baij->ht_size=(int)(factor*nz);
722   size = baij->ht_size;
723 
724   if (baij->ht) {
725     PetscFunctionReturn(0);
726   }
727 
728   /* Allocate Memory for Hash Table */
729   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
730   baij->ht = (int*)(baij->hd + size);
731   HD = baij->hd;
732   HT = baij->ht;
733 
734 
735   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
736 
737 
738   /* Loop Over A */
739   for ( i=0; i<a->mbs; i++ ) {
740     for ( j=ai[i]; j<ai[i+1]; j++ ) {
741       row = i+rstart;
742       col = aj[j]+cstart;
743 
744       key = row*Nbs + col + 1;
745       h1  = HASH(size,key,tmp);
746       for ( k=0; k<size; k++ ){
747         if (HT[(h1+k)%size] == 0.0) {
748           HT[(h1+k)%size] = key;
749           HD[(h1+k)%size] = a->a + j*bs2;
750           break;
751 #if defined(PETSC_USE_BOPT_g)
752         } else {
753           ct++;
754 #endif
755         }
756       }
757 #if defined(PETSC_USE_BOPT_g)
758       if (k> max) max = k;
759 #endif
760     }
761   }
762   /* Loop Over B */
763   for ( i=0; i<b->mbs; i++ ) {
764     for ( j=bi[i]; j<bi[i+1]; j++ ) {
765       row = i+rstart;
766       col = garray[bj[j]];
767       key = row*Nbs + col + 1;
768       h1  = HASH(size,key,tmp);
769       for ( k=0; k<size; k++ ){
770         if (HT[(h1+k)%size] == 0.0) {
771           HT[(h1+k)%size] = key;
772           HD[(h1+k)%size] = b->a + j*bs2;
773           break;
774 #if defined(PETSC_USE_BOPT_g)
775         } else {
776           ct++;
777 #endif
778         }
779       }
780 #if defined(PETSC_USE_BOPT_g)
781       if (k> max) max = k;
782 #endif
783     }
784   }
785 
786   /* Print Summary */
787 #if defined(PETSC_USE_BOPT_g)
788   for ( i=0,j=0; i<size; i++) {
789     if (HT[i]) {j++;}
790   }
791   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((double)(ct+j))/j,max);
792 #endif
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNC__
797 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
798 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
799 {
800   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
801   int         ierr,nstash,reallocs;
802   InsertMode  addv;
803 
804   PetscFunctionBegin;
805   if (baij->donotstash) {
806     PetscFunctionReturn(0);
807   }
808 
809   /* make sure all processors are either in INSERTMODE or ADDMODE */
810   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
811   if (addv == (ADD_VALUES|INSERT_VALUES)) {
812     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
813   }
814   mat->insertmode = addv; /* in case this processor had no cache */
815 
816   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
817   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
818   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
819   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
820   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
821   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNC__
826 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
827 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
828 {
829   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
830   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
831   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
832   int         *row,*col,other_disassembled,r1,r2,r3;
833   Scalar      *val;
834   InsertMode  addv = mat->insertmode;
835 
836   PetscFunctionBegin;
837   if (!baij->donotstash) {
838     while (1) {
839       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
840       if (!flg) break;
841 
842       for ( i=0; i<n; ) {
843         /* Now identify the consecutive vals belonging to the same row */
844         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
845         if (j < n) ncols = j-i;
846         else       ncols = n-i;
847         /* Now assemble all these values with a single function call */
848         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
849         i = j;
850       }
851     }
852     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
853     /* Now process the block-stash. Since the values are stashed column-oriented,
854        set the roworiented flag to column oriented, and after MatSetValues()
855        restore the original flags */
856     r1 = baij->roworiented;
857     r2 = a->roworiented;
858     r3 = b->roworiented;
859     baij->roworiented = 0;
860     a->roworiented    = 0;
861     b->roworiented    = 0;
862     while (1) {
863       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
864       if (!flg) break;
865 
866       for ( i=0; i<n; ) {
867         /* Now identify the consecutive vals belonging to the same row */
868         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
869         if (j < n) ncols = j-i;
870         else       ncols = n-i;
871         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
872         i = j;
873       }
874     }
875     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
876     baij->roworiented = r1;
877     a->roworiented    = r2;
878     b->roworiented    = r3;
879   }
880 
881   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
882   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
883 
884   /* determine if any processor has disassembled, if so we must
885      also disassemble ourselfs, in order that we may reassemble. */
886   /*
887      if nonzero structure of submatrix B cannot change then we know that
888      no processor disassembled thus we can skip this stuff
889   */
890   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
891     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
892     if (mat->was_assembled && !other_disassembled) {
893       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
894     }
895   }
896 
897   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
898     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
899   }
900   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
901   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
902 
903 #if defined(PETSC_USE_BOPT_g)
904   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
905     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
906     baij->ht_total_ct  = 0;
907     baij->ht_insert_ct = 0;
908   }
909 #endif
910   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
911     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
912     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
913     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
914   }
915 
916   if (baij->rowvalues) {
917     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
918     baij->rowvalues = 0;
919   }
920   PetscFunctionReturn(0);
921 }
922 
923 /*
924 #undef __FUNC__
925 #define __FUNC__ "MatView_MPIBAIJ_Binary"
926 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
927 {
928   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
929   int          ierr;
930 
931   PetscFunctionBegin;
932   if (baij->size == 1) {
933     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
934   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
935   PetscFunctionReturn(0);
936 }
937 */
938 
939 #undef __FUNC__
940 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
941 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
942 {
943   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
944   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
945   PetscTruth   isascii,isdraw;
946 
947   PetscFunctionBegin;
948   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
949   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
950   if (isascii) {
951     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
952     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
953       MatInfo info;
954       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
955       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
956       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
957               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
958               baij->bs,(int)info.memory);CHKERRQ(ierr);
959       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
960       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
961       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
962       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
963       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
964       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
965       PetscFunctionReturn(0);
966     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
967       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
968       PetscFunctionReturn(0);
969     }
970   }
971 
972   if (isdraw) {
973     Draw       draw;
974     PetscTruth isnull;
975     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
976     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
977   }
978 
979   if (size == 1) {
980     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
981   } else {
982     /* assemble the entire matrix onto first processor. */
983     Mat         A;
984     Mat_SeqBAIJ *Aloc;
985     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
986     int         mbs = baij->mbs;
987     Scalar      *a;
988 
989     if (!rank) {
990       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
991     } else {
992       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
993     }
994     PLogObjectParent(mat,A);
995 
996     /* copy over the A part */
997     Aloc  = (Mat_SeqBAIJ*) baij->A->data;
998     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
999     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1000 
1001     for ( i=0; i<mbs; i++ ) {
1002       rvals[0] = bs*(baij->rstart + i);
1003       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1004       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1005         col = (baij->cstart+aj[j])*bs;
1006         for (k=0; k<bs; k++ ) {
1007           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1008           col++; a += bs;
1009         }
1010       }
1011     }
1012     /* copy over the B part */
1013     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1014     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1015     for ( i=0; i<mbs; i++ ) {
1016       rvals[0] = bs*(baij->rstart + i);
1017       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1018       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1019         col = baij->garray[aj[j]]*bs;
1020         for (k=0; k<bs; k++ ) {
1021           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1022           col++; a += bs;
1023         }
1024       }
1025     }
1026     ierr = PetscFree(rvals);CHKERRQ(ierr);
1027     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1028     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029     /*
1030        Everyone has to call to draw the matrix since the graphics waits are
1031        synchronized across all processors that share the Draw object
1032     */
1033     if (!rank) {
1034       Viewer sviewer;
1035       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1036       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1037       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1038     }
1039     ierr = MatDestroy(A);CHKERRQ(ierr);
1040   }
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 
1045 
1046 #undef __FUNC__
1047 #define __FUNC__ "MatView_MPIBAIJ"
1048 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1049 {
1050   int        ierr;
1051   PetscTruth isascii,isdraw,issocket,isbinary;
1052 
1053   PetscFunctionBegin;
1054   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1055   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1056   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1058   if (isascii || isdraw || issocket || isbinary) {
1059     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1060   } else {
1061     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1062   }
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNC__
1067 #define __FUNC__ "MatDestroy_MPIBAIJ"
1068 int MatDestroy_MPIBAIJ(Mat mat)
1069 {
1070   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1071   int         ierr;
1072 
1073   PetscFunctionBegin;
1074 
1075   if (mat->mapping) {
1076     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1077   }
1078   if (mat->bmapping) {
1079     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1080   }
1081   if (mat->rmap) {
1082     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1083   }
1084   if (mat->cmap) {
1085     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1086   }
1087 #if defined(PETSC_USE_LOG)
1088   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1089 #endif
1090 
1091   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1092   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1093 
1094   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1095   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1096   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1097 #if defined (PETSC_USE_CTABLE)
1098   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1099 #else
1100   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1101 #endif
1102   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1103   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1104   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1105   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1106   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1107   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1108   ierr = PetscFree(baij);CHKERRQ(ierr);
1109   PLogObjectDestroy(mat);
1110   PetscHeaderDestroy(mat);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNC__
1115 #define __FUNC__ "MatMult_MPIBAIJ"
1116 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1117 {
1118   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1119   int         ierr, nt;
1120 
1121   PetscFunctionBegin;
1122   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1123   if (nt != a->n) {
1124     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1125   }
1126   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1127   if (nt != a->m) {
1128     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1129   }
1130   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1131   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1132   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1133   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1134   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNC__
1139 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1140 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1141 {
1142   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1143   int        ierr;
1144 
1145   PetscFunctionBegin;
1146   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1147   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1148   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1149   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNC__
1154 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1155 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1156 {
1157   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1158   int         ierr;
1159 
1160   PetscFunctionBegin;
1161   /* do nondiagonal part */
1162   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1163   /* send it on its way */
1164   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1165   /* do local part */
1166   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1167   /* receive remote parts: note this assumes the values are not actually */
1168   /* inserted in yy until the next line, which is true for my implementation*/
1169   /* but is not perhaps always true. */
1170   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNC__
1175 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1176 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1177 {
1178   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1179   int         ierr;
1180 
1181   PetscFunctionBegin;
1182   /* do nondiagonal part */
1183   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1184   /* send it on its way */
1185   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1186   /* do local part */
1187   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1188   /* receive remote parts: note this assumes the values are not actually */
1189   /* inserted in yy until the next line, which is true for my implementation*/
1190   /* but is not perhaps always true. */
1191   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 /*
1196   This only works correctly for square matrices where the subblock A->A is the
1197    diagonal block
1198 */
1199 #undef __FUNC__
1200 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1201 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1202 {
1203   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1204   int         ierr;
1205 
1206   PetscFunctionBegin;
1207   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1208   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNC__
1213 #define __FUNC__ "MatScale_MPIBAIJ"
1214 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1215 {
1216   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1217   int         ierr;
1218 
1219   PetscFunctionBegin;
1220   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1221   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNC__
1226 #define __FUNC__ "MatGetSize_MPIBAIJ"
1227 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1228 {
1229   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1230 
1231   PetscFunctionBegin;
1232   if (m) *m = mat->M;
1233   if (n) *n = mat->N;
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNC__
1238 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1239 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1240 {
1241   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1242 
1243   PetscFunctionBegin;
1244   *m = mat->m; *n = mat->n;
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNC__
1249 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1250 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1251 {
1252   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1253 
1254   PetscFunctionBegin;
1255   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNC__
1260 #define __FUNC__ "MatGetRow_MPIBAIJ"
1261 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1262 {
1263   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1264   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1265   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1266   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1267   int        *cmap, *idx_p,cstart = mat->cstart;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1278     int     max = 1,mbs = mat->mbs,tmp;
1279     for ( i=0; i<mbs; i++ ) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1284     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1285   }
1286 
1287   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1288   lrow = row - brstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       int imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for ( i=0; i<nzB; i++ ) {
1305           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1310         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for ( i=0; i<imark; i++ ) {
1316             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1317           }
1318         } else {
1319           for ( i=0; i<nzB; i++ ) {
1320             if (cmap[cworkB[i]/bs] < cstart)
1321               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1322             else break;
1323           }
1324           imark = i;
1325         }
1326         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1327         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1328       }
1329     } else {
1330       if (idx) *idx = 0;
1331       if (v)   *v   = 0;
1332     }
1333   }
1334   *nz = nztot;
1335   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1336   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNC__
1341 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1342 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1343 {
1344   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1345 
1346   PetscFunctionBegin;
1347   if (baij->getrowactive == PETSC_FALSE) {
1348     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1349   }
1350   baij->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNC__
1355 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1356 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1357 {
1358   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1359 
1360   PetscFunctionBegin;
1361   *bs = baij->bs;
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNC__
1366 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1367 int MatZeroEntries_MPIBAIJ(Mat A)
1368 {
1369   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1370   int         ierr;
1371 
1372   PetscFunctionBegin;
1373   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1374   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNC__
1379 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1380 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1381 {
1382   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1383   Mat         A = a->A, B = a->B;
1384   int         ierr;
1385   double      isend[5], irecv[5];
1386 
1387   PetscFunctionBegin;
1388   info->block_size     = (double)a->bs;
1389   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1390   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1391   isend[3] = info->memory;  isend[4] = info->mallocs;
1392   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1393   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1394   isend[3] += info->memory;  isend[4] += info->mallocs;
1395   if (flag == MAT_LOCAL) {
1396     info->nz_used      = isend[0];
1397     info->nz_allocated = isend[1];
1398     info->nz_unneeded  = isend[2];
1399     info->memory       = isend[3];
1400     info->mallocs      = isend[4];
1401   } else if (flag == MAT_GLOBAL_MAX) {
1402     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1403     info->nz_used      = irecv[0];
1404     info->nz_allocated = irecv[1];
1405     info->nz_unneeded  = irecv[2];
1406     info->memory       = irecv[3];
1407     info->mallocs      = irecv[4];
1408   } else if (flag == MAT_GLOBAL_SUM) {
1409     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1410     info->nz_used      = irecv[0];
1411     info->nz_allocated = irecv[1];
1412     info->nz_unneeded  = irecv[2];
1413     info->memory       = irecv[3];
1414     info->mallocs      = irecv[4];
1415   } else {
1416     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1417   }
1418   info->rows_global       = (double)a->M;
1419   info->columns_global    = (double)a->N;
1420   info->rows_local        = (double)a->m;
1421   info->columns_local     = (double)a->N;
1422   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1423   info->fill_ratio_needed = 0;
1424   info->factor_mallocs    = 0;
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNC__
1429 #define __FUNC__ "MatSetOption_MPIBAIJ"
1430 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1431 {
1432   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1433   int         ierr;
1434 
1435   PetscFunctionBegin;
1436   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1437       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1438       op == MAT_COLUMNS_UNSORTED ||
1439       op == MAT_COLUMNS_SORTED ||
1440       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1441       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1442         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1443         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1444   } else if (op == MAT_ROW_ORIENTED) {
1445         a->roworiented = 1;
1446         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1447         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1448   } else if (op == MAT_ROWS_SORTED ||
1449              op == MAT_ROWS_UNSORTED ||
1450              op == MAT_SYMMETRIC ||
1451              op == MAT_STRUCTURALLY_SYMMETRIC ||
1452              op == MAT_YES_NEW_DIAGONALS ||
1453              op == MAT_USE_HASH_TABLE) {
1454     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1455   } else if (op == MAT_COLUMN_ORIENTED) {
1456     a->roworiented = 0;
1457     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1458     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1459   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1460     a->donotstash = 1;
1461   } else if (op == MAT_NO_NEW_DIAGONALS) {
1462     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1463   } else if (op == MAT_USE_HASH_TABLE) {
1464     a->ht_flag = 1;
1465   } else {
1466     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNC__
1472 #define __FUNC__ "MatTranspose_MPIBAIJ("
1473 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1474 {
1475   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1476   Mat_SeqBAIJ *Aloc;
1477   Mat        B;
1478   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1479   int        bs=baij->bs,mbs=baij->mbs;
1480   Scalar     *a;
1481 
1482   PetscFunctionBegin;
1483   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1484   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1485 CHKERRQ(ierr);
1486 
1487   /* copy over the A part */
1488   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1489   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1490   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1491 
1492   for ( i=0; i<mbs; i++ ) {
1493     rvals[0] = bs*(baij->rstart + i);
1494     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1495     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1496       col = (baij->cstart+aj[j])*bs;
1497       for (k=0; k<bs; k++ ) {
1498         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1499         col++; a += bs;
1500       }
1501     }
1502   }
1503   /* copy over the B part */
1504   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1505   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1506   for ( i=0; i<mbs; i++ ) {
1507     rvals[0] = bs*(baij->rstart + i);
1508     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1509     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1510       col = baij->garray[aj[j]]*bs;
1511       for (k=0; k<bs; k++ ) {
1512         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1513         col++; a += bs;
1514       }
1515     }
1516   }
1517   ierr = PetscFree(rvals);CHKERRQ(ierr);
1518   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1519   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1520 
1521   if (matout != PETSC_NULL) {
1522     *matout = B;
1523   } else {
1524     PetscOps *Abops;
1525     MatOps   Aops;
1526 
1527     /* This isn't really an in-place transpose .... but free data structures from baij */
1528     ierr = PetscFree(baij->rowners); CHKERRQ(ierr);
1529     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1530     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1531 #if defined (PETSC_USE_CTABLE)
1532     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1533 #else
1534     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1535 #endif
1536     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1537     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1538     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1539     ierr = PetscFree(baij);CHKERRQ(ierr);
1540 
1541     /*
1542        This is horrible, horrible code. We need to keep the
1543       A pointers for the bops and ops but copy everything
1544       else from C.
1545     */
1546     Abops   = A->bops;
1547     Aops    = A->ops;
1548     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1549     A->bops = Abops;
1550     A->ops  = Aops;
1551 
1552     PetscHeaderDestroy(B);
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 #undef __FUNC__
1558 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1559 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1560 {
1561   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1562   Mat         a = baij->A, b = baij->B;
1563   int         ierr,s1,s2,s3;
1564 
1565   PetscFunctionBegin;
1566   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1567   if (rr) {
1568     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1569     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1570     /* Overlap communication with computation. */
1571     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1572   }
1573   if (ll) {
1574     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1575     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1576     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1577   }
1578   /* scale  the diagonal block */
1579   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1580 
1581   if (rr) {
1582     /* Do a scatter end and then right scale the off-diagonal block */
1583     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1584     ierr = (*b->ops->diagonalscale)(b,0,baij->lvec);CHKERRQ(ierr);
1585   }
1586 
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNC__
1591 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1592 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1593 {
1594   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1595   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1596   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1597   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1598   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1599   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1600   MPI_Comm       comm = A->comm;
1601   MPI_Request    *send_waits,*recv_waits;
1602   MPI_Status     recv_status,*send_status;
1603   IS             istmp;
1604 
1605   PetscFunctionBegin;
1606   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1607   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1608 
1609   /*  first count number of contributors to each processor */
1610   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1611   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1612   procs  = nprocs + size;
1613   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1614   for ( i=0; i<N; i++ ) {
1615     idx   = rows[i];
1616     found = 0;
1617     for ( j=0; j<size; j++ ) {
1618       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1619         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1620       }
1621     }
1622     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1623   }
1624   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1625 
1626   /* inform other processors of number of messages and max length*/
1627   work   = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work);
1628   ierr   = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1629   nmax   = work[rank];
1630   nrecvs = work[size+rank];
1631   ierr = PetscFree(work);CHKERRQ(ierr);
1632 
1633   /* post receives:   */
1634   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1635   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1636   for ( i=0; i<nrecvs; i++ ) {
1637     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1638   }
1639 
1640   /* do sends:
1641      1) starts[i] gives the starting index in svalues for stuff going to
1642      the ith processor
1643   */
1644   svalues    = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1645   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1646   starts     = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
1647   starts[0]  = 0;
1648   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1649   for ( i=0; i<N; i++ ) {
1650     svalues[starts[owner[i]]++] = rows[i];
1651   }
1652   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1653 
1654   starts[0] = 0;
1655   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1656   count = 0;
1657   for ( i=0; i<size; i++ ) {
1658     if (procs[i]) {
1659       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1660     }
1661   }
1662   ierr = PetscFree(starts);CHKERRQ(ierr);
1663 
1664   base = owners[rank]*bs;
1665 
1666   /*  wait on receives */
1667   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
1668   source = lens + nrecvs;
1669   count  = nrecvs; slen = 0;
1670   while (count) {
1671     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1672     /* unpack receives into our local space */
1673     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1674     source[imdex]  = recv_status.MPI_SOURCE;
1675     lens[imdex]    = n;
1676     slen          += n;
1677     count--;
1678   }
1679   ierr = PetscFree(recv_waits); CHKERRQ(ierr);
1680 
1681   /* move the data into the send scatter */
1682   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
1683   count = 0;
1684   for ( i=0; i<nrecvs; i++ ) {
1685     values = rvalues + i*nmax;
1686     for ( j=0; j<lens[i]; j++ ) {
1687       lrows[count++] = values[j] - base;
1688     }
1689   }
1690   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1691   ierr = PetscFree(lens);CHKERRQ(ierr);
1692   ierr = PetscFree(owner);CHKERRQ(ierr);
1693   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1694 
1695   /* actually zap the local rows */
1696   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1697   PLogObjectParent(A,istmp);
1698 
1699   /*
1700         Zero the required rows. If the "diagonal block" of the matrix
1701      is square and the user wishes to set the diagonal we use seperate
1702      code so that MatSetValues() is not called for each diagonal allocating
1703      new memory, thus calling lots of mallocs and slowing things down.
1704 
1705        Contributed by: Mathew Knepley
1706   */
1707   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1708   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
1709   if (diag && (l->A->M == l->A->N)) {
1710     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
1711   } else if (diag) {
1712     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1713     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1714       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1715 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1716     }
1717     for ( i = 0; i < slen; i++ ) {
1718       row  = lrows[i] + rstart_bs;
1719       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1720     }
1721     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   } else {
1724     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1725   }
1726 
1727   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1728   ierr = PetscFree(lrows);CHKERRQ(ierr);
1729 
1730   /* wait on sends */
1731   if (nsends) {
1732     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1733     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1734     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1735   }
1736   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1737   ierr = PetscFree(svalues);CHKERRQ(ierr);
1738 
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNC__
1743 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1744 int MatPrintHelp_MPIBAIJ(Mat A)
1745 {
1746   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1747   MPI_Comm    comm = A->comm;
1748   static int  called = 0;
1749   int         ierr;
1750 
1751   PetscFunctionBegin;
1752   if (!a->rank) {
1753     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1754   }
1755   if (called) {PetscFunctionReturn(0);} else called = 1;
1756   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1757   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNC__
1762 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1763 int MatSetUnfactored_MPIBAIJ(Mat A)
1764 {
1765   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1766   int         ierr;
1767 
1768   PetscFunctionBegin;
1769   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1774 
1775 #undef __FUNC__
1776 #define __FUNC__ "MatEqual_MPIBAIJ"
1777 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
1778 {
1779   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
1780   Mat         a, b, c, d;
1781   PetscTruth  flg;
1782   int         ierr;
1783 
1784   PetscFunctionBegin;
1785   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1786   a = matA->A; b = matA->B;
1787   c = matB->A; d = matB->B;
1788 
1789   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1790   if (flg == PETSC_TRUE) {
1791     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1792   }
1793   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 /* -------------------------------------------------------------------*/
1798 static struct _MatOps MatOps_Values = {
1799   MatSetValues_MPIBAIJ,
1800   MatGetRow_MPIBAIJ,
1801   MatRestoreRow_MPIBAIJ,
1802   MatMult_MPIBAIJ,
1803   MatMultAdd_MPIBAIJ,
1804   MatMultTrans_MPIBAIJ,
1805   MatMultTransAdd_MPIBAIJ,
1806   0,
1807   0,
1808   0,
1809   0,
1810   0,
1811   0,
1812   0,
1813   MatTranspose_MPIBAIJ,
1814   MatGetInfo_MPIBAIJ,
1815   MatEqual_MPIBAIJ,
1816   MatGetDiagonal_MPIBAIJ,
1817   MatDiagonalScale_MPIBAIJ,
1818   MatNorm_MPIBAIJ,
1819   MatAssemblyBegin_MPIBAIJ,
1820   MatAssemblyEnd_MPIBAIJ,
1821   0,
1822   MatSetOption_MPIBAIJ,
1823   MatZeroEntries_MPIBAIJ,
1824   MatZeroRows_MPIBAIJ,
1825   0,
1826   0,
1827   0,
1828   0,
1829   MatGetSize_MPIBAIJ,
1830   MatGetLocalSize_MPIBAIJ,
1831   MatGetOwnershipRange_MPIBAIJ,
1832   0,
1833   0,
1834   0,
1835   0,
1836   MatDuplicate_MPIBAIJ,
1837   0,
1838   0,
1839   0,
1840   0,
1841   0,
1842   MatGetSubMatrices_MPIBAIJ,
1843   MatIncreaseOverlap_MPIBAIJ,
1844   MatGetValues_MPIBAIJ,
1845   0,
1846   MatPrintHelp_MPIBAIJ,
1847   MatScale_MPIBAIJ,
1848   0,
1849   0,
1850   0,
1851   MatGetBlockSize_MPIBAIJ,
1852   0,
1853   0,
1854   0,
1855   0,
1856   0,
1857   0,
1858   MatSetUnfactored_MPIBAIJ,
1859   0,
1860   MatSetValuesBlocked_MPIBAIJ,
1861   0,
1862   0,
1863   0,
1864   MatGetMaps_Petsc};
1865 
1866 
1867 EXTERN_C_BEGIN
1868 #undef __FUNC__
1869 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1870 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1871 {
1872   PetscFunctionBegin;
1873   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1874   *iscopy = PETSC_FALSE;
1875   PetscFunctionReturn(0);
1876 }
1877 EXTERN_C_END
1878 
1879 #undef __FUNC__
1880 #define __FUNC__ "MatCreateMPIBAIJ"
1881 /*@C
1882    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1883    (block compressed row).  For good matrix assembly performance
1884    the user should preallocate the matrix storage by setting the parameters
1885    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1886    performance can be increased by more than a factor of 50.
1887 
1888    Collective on MPI_Comm
1889 
1890    Input Parameters:
1891 +  comm - MPI communicator
1892 .  bs   - size of blockk
1893 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1894            This value should be the same as the local size used in creating the
1895            y vector for the matrix-vector product y = Ax.
1896 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1897            This value should be the same as the local size used in creating the
1898            x vector for the matrix-vector product y = Ax.
1899 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1900 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1901 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1902            submatrix  (same for all local rows)
1903 .  d_nnz - array containing the number of block nonzeros in the various block rows
1904            of the in diagonal portion of the local (possibly different for each block
1905            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1906 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1907            submatrix (same for all local rows).
1908 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1909            off-diagonal portion of the local submatrix (possibly different for
1910            each block row) or PETSC_NULL.
1911 
1912    Output Parameter:
1913 .  A - the matrix
1914 
1915    Options Database Keys:
1916 .   -mat_no_unroll - uses code that does not unroll the loops in the
1917                      block calculations (much slower)
1918 .   -mat_block_size - size of the blocks to use
1919 .   -mat_mpi - use the parallel matrix data structures even on one processor
1920                (defaults to using SeqBAIJ format on one processor)
1921 
1922    Notes:
1923    The user MUST specify either the local or global matrix dimensions
1924    (possibly both).
1925 
1926    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1927    than it must be used on all processors that share the object for that argument.
1928 
1929    Storage Information:
1930    For a square global matrix we define each processor's diagonal portion
1931    to be its local rows and the corresponding columns (a square submatrix);
1932    each processor's off-diagonal portion encompasses the remainder of the
1933    local matrix (a rectangular submatrix).
1934 
1935    The user can specify preallocated storage for the diagonal part of
1936    the local submatrix with either d_nz or d_nnz (not both).  Set
1937    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1938    memory allocation.  Likewise, specify preallocated storage for the
1939    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1940 
1941    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1942    the figure below we depict these three local rows and all columns (0-11).
1943 
1944 .vb
1945            0 1 2 3 4 5 6 7 8 9 10 11
1946           -------------------
1947    row 3  |  o o o d d d o o o o o o
1948    row 4  |  o o o d d d o o o o o o
1949    row 5  |  o o o d d d o o o o o o
1950           -------------------
1951 .ve
1952 
1953    Thus, any entries in the d locations are stored in the d (diagonal)
1954    submatrix, and any entries in the o locations are stored in the
1955    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1956    stored simply in the MATSEQBAIJ format for compressed row storage.
1957 
1958    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1959    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1960    In general, for PDE problems in which most nonzeros are near the diagonal,
1961    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1962    or you will get TERRIBLE performance; see the users' manual chapter on
1963    matrices.
1964 
1965    Level: intermediate
1966 
1967 .keywords: matrix, block, aij, compressed row, sparse, parallel
1968 
1969 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1970 @*/
1971 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1972                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1973 {
1974   Mat          B;
1975   Mat_MPIBAIJ  *b;
1976   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
1977   PetscTruth   flag1,flag2,flg;
1978 
1979   PetscFunctionBegin;
1980   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1981 
1982   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1983   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
1984   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
1985   if (d_nnz) {
1986     for (i=0; i<m/bs; i++) {
1987       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1988     }
1989   }
1990   if (o_nnz) {
1991     for (i=0; i<m/bs; i++) {
1992       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1993     }
1994   }
1995 
1996   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1997   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
1998   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1999   if (!flag1 && !flag2 && size == 1) {
2000     if (M == PETSC_DECIDE) M = m;
2001     if (N == PETSC_DECIDE) N = n;
2002     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2003     PetscFunctionReturn(0);
2004   }
2005 
2006   *A = 0;
2007   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2008   PLogObjectCreate(B);
2009   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2010   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2011   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2012 
2013   B->ops->destroy    = MatDestroy_MPIBAIJ;
2014   B->ops->view       = MatView_MPIBAIJ;
2015   B->mapping    = 0;
2016   B->factor     = 0;
2017   B->assembled  = PETSC_FALSE;
2018 
2019   B->insertmode = NOT_SET_VALUES;
2020   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2021   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2022 
2023   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2024     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2025   }
2026   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2027     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2028   }
2029   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2030     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2031   }
2032   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2033   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2034 
2035   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2036     work[0] = m; work[1] = n;
2037     mbs = m/bs; nbs = n/bs;
2038     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2039     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2040     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2041   }
2042   if (m == PETSC_DECIDE) {
2043     Mbs = M/bs;
2044     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2045     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2046     m   = mbs*bs;
2047   }
2048   if (n == PETSC_DECIDE) {
2049     Nbs = N/bs;
2050     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2051     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2052     n   = nbs*bs;
2053   }
2054   if (mbs*bs != m || nbs*bs != n) {
2055     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2056   }
2057 
2058   b->m = m; B->m = m;
2059   b->n = n; B->n = n;
2060   b->N = N; B->N = N;
2061   b->M = M; B->M = M;
2062   b->bs  = bs;
2063   b->bs2 = bs*bs;
2064   b->mbs = mbs;
2065   b->nbs = nbs;
2066   b->Mbs = Mbs;
2067   b->Nbs = Nbs;
2068 
2069   /* the information in the maps duplicates the information computed below, eventually
2070      we should remove the duplicate information that is not contained in the maps */
2071   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2072   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2073 
2074   /* build local table of row and column ownerships */
2075   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2076   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2077   b->cowners    = b->rowners + b->size + 2;
2078   b->rowners_bs = b->cowners + b->size + 2;
2079   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2080   b->rowners[0]    = 0;
2081   for ( i=2; i<=b->size; i++ ) {
2082     b->rowners[i] += b->rowners[i-1];
2083   }
2084   for ( i=0; i<=b->size; i++ ) {
2085     b->rowners_bs[i] = b->rowners[i]*bs;
2086   }
2087   b->rstart    = b->rowners[b->rank];
2088   b->rend      = b->rowners[b->rank+1];
2089   b->rstart_bs = b->rstart * bs;
2090   b->rend_bs   = b->rend * bs;
2091 
2092   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2093   b->cowners[0] = 0;
2094   for ( i=2; i<=b->size; i++ ) {
2095     b->cowners[i] += b->cowners[i-1];
2096   }
2097   b->cstart    = b->cowners[b->rank];
2098   b->cend      = b->cowners[b->rank+1];
2099   b->cstart_bs = b->cstart * bs;
2100   b->cend_bs   = b->cend * bs;
2101 
2102 
2103   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2104   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2105   PLogObjectParent(B,b->A);
2106   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2107   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2108   PLogObjectParent(B,b->B);
2109 
2110   /* build cache for off array entries formed */
2111   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2112   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2113   b->donotstash  = 0;
2114   b->colmap      = 0;
2115   b->garray      = 0;
2116   b->roworiented = 1;
2117 
2118   /* stuff used in block assembly */
2119   b->barray       = 0;
2120 
2121   /* stuff used for matrix vector multiply */
2122   b->lvec         = 0;
2123   b->Mvctx        = 0;
2124 
2125   /* stuff for MatGetRow() */
2126   b->rowindices   = 0;
2127   b->rowvalues    = 0;
2128   b->getrowactive = PETSC_FALSE;
2129 
2130   /* hash table stuff */
2131   b->ht           = 0;
2132   b->hd           = 0;
2133   b->ht_size      = 0;
2134   b->ht_flag      = 0;
2135   b->ht_fact      = 0;
2136   b->ht_total_ct  = 0;
2137   b->ht_insert_ct = 0;
2138 
2139   *A = B;
2140   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2141   if (flg) {
2142     double fact = 1.39;
2143     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2144     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2145     if (fact <= 1.0) fact = 1.39;
2146     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2147     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2148   }
2149   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2150                                      "MatStoreValues_MPIBAIJ",
2151                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2152   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2153                                      "MatRetrieveValues_MPIBAIJ",
2154                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2155   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2156                                      "MatGetDiagonalBlock_MPIBAIJ",
2157                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 #undef __FUNC__
2162 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2163 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2164 {
2165   Mat         mat;
2166   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2167   int         ierr, len=0;
2168   PetscTruth  flg;
2169 
2170   PetscFunctionBegin;
2171   *newmat       = 0;
2172   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2173   PLogObjectCreate(mat);
2174   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2175   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2176   mat->ops->destroy = MatDestroy_MPIBAIJ;
2177   mat->ops->view    = MatView_MPIBAIJ;
2178   mat->factor       = matin->factor;
2179   mat->assembled    = PETSC_TRUE;
2180   mat->insertmode   = NOT_SET_VALUES;
2181 
2182   a->m = mat->m   = oldmat->m;
2183   a->n = mat->n   = oldmat->n;
2184   a->M = mat->M   = oldmat->M;
2185   a->N = mat->N   = oldmat->N;
2186 
2187   a->bs  = oldmat->bs;
2188   a->bs2 = oldmat->bs2;
2189   a->mbs = oldmat->mbs;
2190   a->nbs = oldmat->nbs;
2191   a->Mbs = oldmat->Mbs;
2192   a->Nbs = oldmat->Nbs;
2193 
2194   a->rstart       = oldmat->rstart;
2195   a->rend         = oldmat->rend;
2196   a->cstart       = oldmat->cstart;
2197   a->cend         = oldmat->cend;
2198   a->size         = oldmat->size;
2199   a->rank         = oldmat->rank;
2200   a->donotstash   = oldmat->donotstash;
2201   a->roworiented  = oldmat->roworiented;
2202   a->rowindices   = 0;
2203   a->rowvalues    = 0;
2204   a->getrowactive = PETSC_FALSE;
2205   a->barray       = 0;
2206   a->rstart_bs    = oldmat->rstart_bs;
2207   a->rend_bs      = oldmat->rend_bs;
2208   a->cstart_bs    = oldmat->cstart_bs;
2209   a->cend_bs      = oldmat->cend_bs;
2210 
2211   /* hash table stuff */
2212   a->ht           = 0;
2213   a->hd           = 0;
2214   a->ht_size      = 0;
2215   a->ht_flag      = oldmat->ht_flag;
2216   a->ht_fact      = oldmat->ht_fact;
2217   a->ht_total_ct  = 0;
2218   a->ht_insert_ct = 0;
2219 
2220 
2221   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2222   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2223   a->cowners    = a->rowners + a->size + 2;
2224   a->rowners_bs = a->cowners + a->size + 2;
2225   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2226   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2227   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2228   if (oldmat->colmap) {
2229 #if defined (PETSC_USE_CTABLE)
2230   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2231 #else
2232     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2233     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2234     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2235 #endif
2236   } else a->colmap = 0;
2237   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2238     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2239     PLogObjectMemory(mat,len*sizeof(int));
2240     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2241   } else a->garray = 0;
2242 
2243   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2244   PLogObjectParent(mat,a->lvec);
2245   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2246 
2247   PLogObjectParent(mat,a->Mvctx);
2248   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2249   PLogObjectParent(mat,a->A);
2250   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2251   PLogObjectParent(mat,a->B);
2252   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2253   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2254   if (flg) {
2255     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2256   }
2257   *newmat = mat;
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 #include "sys.h"
2262 
2263 #undef __FUNC__
2264 #define __FUNC__ "MatLoad_MPIBAIJ"
2265 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2266 {
2267   Mat          A;
2268   int          i, nz, ierr, j,rstart, rend, fd;
2269   Scalar       *vals,*buf;
2270   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2271   MPI_Status   status;
2272   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2273   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2274   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2275   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2276   int          dcount,kmax,k,nzcount,tmp;
2277 
2278   PetscFunctionBegin;
2279   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2280 
2281   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2282   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2283   if (!rank) {
2284     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2285     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2286     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2287     if (header[3] < 0) {
2288       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2289     }
2290   }
2291 
2292   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2293   M = header[1]; N = header[2];
2294 
2295   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2296 
2297   /*
2298      This code adds extra rows to make sure the number of rows is
2299      divisible by the blocksize
2300   */
2301   Mbs        = M/bs;
2302   extra_rows = bs - M + bs*(Mbs);
2303   if (extra_rows == bs) extra_rows = 0;
2304   else                  Mbs++;
2305   if (extra_rows &&!rank) {
2306     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2307   }
2308 
2309   /* determine ownership of all rows */
2310   mbs = Mbs/size + ((Mbs % size) > rank);
2311   m   = mbs * bs;
2312   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2313   browners = rowners + size + 1;
2314   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2315   rowners[0] = 0;
2316   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2317   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2318   rstart = rowners[rank];
2319   rend   = rowners[rank+1];
2320 
2321   /* distribute row lengths to all processors */
2322   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
2323   if (!rank) {
2324     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2325     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2326     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2327     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2328     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2329     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2330     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2331   } else {
2332     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2333   }
2334 
2335   if (!rank) {
2336     /* calculate the number of nonzeros on each processor */
2337     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2338     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2339     for ( i=0; i<size; i++ ) {
2340       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2341         procsnz[i] += rowlengths[j];
2342       }
2343     }
2344     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2345 
2346     /* determine max buffer needed and allocate it */
2347     maxnz = 0;
2348     for ( i=0; i<size; i++ ) {
2349       maxnz = PetscMax(maxnz,procsnz[i]);
2350     }
2351     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2352 
2353     /* read in my part of the matrix column indices  */
2354     nz = procsnz[0];
2355     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2356     mycols = ibuf;
2357     if (size == 1)  nz -= extra_rows;
2358     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2359     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2360 
2361     /* read in every ones (except the last) and ship off */
2362     for ( i=1; i<size-1; i++ ) {
2363       nz   = procsnz[i];
2364       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2365       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2366     }
2367     /* read in the stuff for the last proc */
2368     if ( size != 1 ) {
2369       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2370       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2371       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2372       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2373     }
2374     ierr = PetscFree(cols);CHKERRQ(ierr);
2375   } else {
2376     /* determine buffer space needed for message */
2377     nz = 0;
2378     for ( i=0; i<m; i++ ) {
2379       nz += locrowlens[i];
2380     }
2381     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2382     mycols = ibuf;
2383     /* receive message of column indices*/
2384     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2385     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2386     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2387   }
2388 
2389   /* loop over local rows, determining number of off diagonal entries */
2390   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2391   odlens = dlens + (rend-rstart);
2392   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2393   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2394   masked1 = mask    + Mbs;
2395   masked2 = masked1 + Mbs;
2396   rowcount = 0; nzcount = 0;
2397   for ( i=0; i<mbs; i++ ) {
2398     dcount  = 0;
2399     odcount = 0;
2400     for ( j=0; j<bs; j++ ) {
2401       kmax = locrowlens[rowcount];
2402       for ( k=0; k<kmax; k++ ) {
2403         tmp = mycols[nzcount++]/bs;
2404         if (!mask[tmp]) {
2405           mask[tmp] = 1;
2406           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2407           else masked1[dcount++] = tmp;
2408         }
2409       }
2410       rowcount++;
2411     }
2412 
2413     dlens[i]  = dcount;
2414     odlens[i] = odcount;
2415 
2416     /* zero out the mask elements we set */
2417     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2418     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2419   }
2420 
2421   /* create our matrix */
2422   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2423   A = *newmat;
2424   MatSetOption(A,MAT_COLUMNS_SORTED);
2425 
2426   if (!rank) {
2427     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
2428     /* read in my part of the matrix numerical values  */
2429     nz = procsnz[0];
2430     vals = buf;
2431     mycols = ibuf;
2432     if (size == 1)  nz -= extra_rows;
2433     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2434     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2435 
2436     /* insert into matrix */
2437     jj      = rstart*bs;
2438     for ( i=0; i<m; i++ ) {
2439       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2440       mycols += locrowlens[i];
2441       vals   += locrowlens[i];
2442       jj++;
2443     }
2444     /* read in other processors (except the last one) and ship out */
2445     for ( i=1; i<size-1; i++ ) {
2446       nz   = procsnz[i];
2447       vals = buf;
2448       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2449       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2450     }
2451     /* the last proc */
2452     if ( size != 1 ){
2453       nz   = procsnz[i] - extra_rows;
2454       vals = buf;
2455       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2456       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2457       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2458     }
2459     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2460   } else {
2461     /* receive numeric values */
2462     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
2463 
2464     /* receive message of values*/
2465     vals   = buf;
2466     mycols = ibuf;
2467     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2468     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2469     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2470 
2471     /* insert into matrix */
2472     jj      = rstart*bs;
2473     for ( i=0; i<m; i++ ) {
2474       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2475       mycols += locrowlens[i];
2476       vals   += locrowlens[i];
2477       jj++;
2478     }
2479   }
2480   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2481   ierr = PetscFree(buf);CHKERRQ(ierr);
2482   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2483   ierr = PetscFree(rowners);CHKERRQ(ierr);
2484   ierr = PetscFree(dlens);CHKERRQ(ierr);
2485   ierr = PetscFree(mask);CHKERRQ(ierr);
2486   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2487   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 #undef __FUNC__
2492 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2493 /*@
2494    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2495 
2496    Input Parameters:
2497 .  mat  - the matrix
2498 .  fact - factor
2499 
2500    Collective on Mat
2501 
2502    Level: advanced
2503 
2504   Notes:
2505    This can also be set by the command line option: -mat_use_hash_table fact
2506 
2507 .keywords: matrix, hashtable, factor, HT
2508 
2509 .seealso: MatSetOption()
2510 @*/
2511 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2512 {
2513   Mat_MPIBAIJ *baij;
2514 
2515   PetscFunctionBegin;
2516   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2517   if (mat->type != MATMPIBAIJ) {
2518     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2519   }
2520   baij = (Mat_MPIBAIJ*) mat->data;
2521   baij->ht_fact = fact;
2522   PetscFunctionReturn(0);
2523 }
2524