xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision be0abb6ddea392ceaee217d3645fed7c6ea71822)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.174 1999/06/30 23:51:57 balay Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 EXTERN_C_BEGIN
20 #undef __FUNC__
21 #define __FUNC__ "MatStoreValues_MPIBAIJ"
22 int MatStoreValues_MPIBAIJ(Mat mat)
23 {
24   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
25   int         ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
29   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 EXTERN_C_END
33 
34 EXTERN_C_BEGIN
35 #undef __FUNC__
36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
37 int MatRetrieveValues_MPIBAIJ(Mat mat)
38 {
39   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
40   int         ierr;
41 
42   PetscFunctionBegin;
43   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
44   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
45   PetscFunctionReturn(0);
46 }
47 EXTERN_C_END
48 
49 /*
50      Local utility routine that creates a mapping from the global column
51    number to the local number in the off-diagonal part of the local
52    storage of the matrix.  This is done in a non scable way since the
53    length of colmap equals the global matrix length.
54 */
55 #undef __FUNC__
56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
57 static int CreateColmap_MPIBAIJ_Private(Mat mat)
58 {
59   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
60   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
61   int         nbs = B->nbs,i,bs=B->bs,ierr;
62 
63   PetscFunctionBegin;
64 #if defined (PETSC_USE_CTABLE)
65   ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
66   for ( i=0; i<nbs; i++ ){
67     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
68   }
69 #else
70   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
71   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
72   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
73   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
74 #endif
75   PetscFunctionReturn(0);
76 }
77 
78 #define CHUNKSIZE  10
79 
80 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
81 { \
82  \
83     brow = row/bs;  \
84     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
85     rmax = aimax[brow]; nrow = ailen[brow]; \
86       bcol = col/bs; \
87       ridx = row % bs; cidx = col % bs; \
88       low = 0; high = nrow; \
89       while (high-low > 3) { \
90         t = (low+high)/2; \
91         if (rp[t] > bcol) high = t; \
92         else              low  = t; \
93       } \
94       for ( _i=low; _i<high; _i++ ) { \
95         if (rp[_i] > bcol) break; \
96         if (rp[_i] == bcol) { \
97           bap  = ap +  bs2*_i + bs*cidx + ridx; \
98           if (addv == ADD_VALUES) *bap += value;  \
99           else                    *bap  = value;  \
100           goto a_noinsert; \
101         } \
102       } \
103       if (a->nonew == 1) goto a_noinsert; \
104       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
105       if (nrow >= rmax) { \
106         /* there is no extra room in row, therefore enlarge */ \
107         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
108         Scalar *new_a; \
109  \
110         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
111  \
112         /* malloc new storage space */ \
113         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
114         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
115         new_j   = (int *) (new_a + bs2*new_nz); \
116         new_i   = new_j + new_nz; \
117  \
118         /* copy over old data into new slots */ \
119         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
120         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
121         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
122         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
123         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
124                           len*sizeof(int));CHKERRQ(ierr); \
125         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
126         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
127         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
128                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
129         /* free up old matrix storage */ \
130         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
131         if (!a->singlemalloc) { \
132           ierr = PetscFree(a->i);CHKERRQ(ierr); \
133           ierr = PetscFree(a->j);CHKERRQ(ierr);\
134         } \
135         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
136         a->singlemalloc = 1; \
137  \
138         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
139         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
140         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
141         a->maxnz += bs2*CHUNKSIZE; \
142         a->reallocs++; \
143         a->nz++; \
144       } \
145       N = nrow++ - 1;  \
146       /* shift up all the later entries in this row */ \
147       for ( ii=N; ii>=_i; ii-- ) { \
148         rp[ii+1] = rp[ii]; \
149         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
150       } \
151       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
152       rp[_i]                      = bcol;  \
153       ap[bs2*_i + bs*cidx + ridx] = value;  \
154       a_noinsert:; \
155     ailen[brow] = nrow; \
156 }
157 
158 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
159 { \
160  \
161     brow = row/bs;  \
162     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
163     rmax = bimax[brow]; nrow = bilen[brow]; \
164       bcol = col/bs; \
165       ridx = row % bs; cidx = col % bs; \
166       low = 0; high = nrow; \
167       while (high-low > 3) { \
168         t = (low+high)/2; \
169         if (rp[t] > bcol) high = t; \
170         else              low  = t; \
171       } \
172       for ( _i=low; _i<high; _i++ ) { \
173         if (rp[_i] > bcol) break; \
174         if (rp[_i] == bcol) { \
175           bap  = ap +  bs2*_i + bs*cidx + ridx; \
176           if (addv == ADD_VALUES) *bap += value;  \
177           else                    *bap  = value;  \
178           goto b_noinsert; \
179         } \
180       } \
181       if (b->nonew == 1) goto b_noinsert; \
182       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
183       if (nrow >= rmax) { \
184         /* there is no extra room in row, therefore enlarge */ \
185         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
186         Scalar *new_a; \
187  \
188         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
189  \
190         /* malloc new storage space */ \
191         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
192         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
193         new_j   = (int *) (new_a + bs2*new_nz); \
194         new_i   = new_j + new_nz; \
195  \
196         /* copy over old data into new slots */ \
197         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
198         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
199         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
200         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
201         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
202                            len*sizeof(int));CHKERRQ(ierr); \
203         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
204         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
205         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
206                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
207         /* free up old matrix storage */ \
208         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
209         if (!b->singlemalloc) { \
210           ierr = PetscFree(b->i);CHKERRQ(ierr); \
211           ierr = PetscFree(b->j);CHKERRQ(ierr); \
212         } \
213         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
214         b->singlemalloc = 1; \
215  \
216         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
217         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
218         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
219         b->maxnz += bs2*CHUNKSIZE; \
220         b->reallocs++; \
221         b->nz++; \
222       } \
223       N = nrow++ - 1;  \
224       /* shift up all the later entries in this row */ \
225       for ( ii=N; ii>=_i; ii-- ) { \
226         rp[ii+1] = rp[ii]; \
227         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
228       } \
229       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
230       rp[_i]                      = bcol;  \
231       ap[bs2*_i + bs*cidx + ridx] = value;  \
232       b_noinsert:; \
233     bilen[brow] = nrow; \
234 }
235 
236 #undef __FUNC__
237 #define __FUNC__ "MatSetValues_MPIBAIJ"
238 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
239 {
240   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
241   Scalar      value;
242   int         ierr,i,j,row,col;
243   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
244   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
245   int         cend_orig=baij->cend_bs,bs=baij->bs;
246 
247   /* Some Variables required in the macro */
248   Mat         A = baij->A;
249   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
250   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
251   Scalar      *aa=a->a;
252 
253   Mat         B = baij->B;
254   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
255   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
256   Scalar      *ba=b->a;
257 
258   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
259   int         low,high,t,ridx,cidx,bs2=a->bs2;
260   Scalar      *ap,*bap;
261 
262   PetscFunctionBegin;
263   for ( i=0; i<m; i++ ) {
264     if (im[i] < 0) continue;
265 #if defined(PETSC_USE_BOPT_g)
266     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
267 #endif
268     if (im[i] >= rstart_orig && im[i] < rend_orig) {
269       row = im[i] - rstart_orig;
270       for ( j=0; j<n; j++ ) {
271         if (in[j] >= cstart_orig && in[j] < cend_orig){
272           col = in[j] - cstart_orig;
273           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
274           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
275           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
276         } else if (in[j] < 0) continue;
277 #if defined(PETSC_USE_BOPT_g)
278         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
279 #endif
280         else {
281           if (mat->was_assembled) {
282             if (!baij->colmap) {
283               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
284             }
285 #if defined (PETSC_USE_CTABLE)
286             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
287             col  = col - 1 + in[j]%bs;
288 #else
289             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
290 #endif
291             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
292               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
293               col =  in[j];
294               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
295               B = baij->B;
296               b = (Mat_SeqBAIJ *) (B)->data;
297               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
298               ba=b->a;
299             }
300           } else col = in[j];
301           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
302           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
303           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
304         }
305       }
306     } else {
307       if ( !baij->donotstash) {
308         if (roworiented) {
309           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
310         } else {
311           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
312         }
313       }
314     }
315   }
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNC__
320 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
321 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
322 {
323   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
324   Scalar      *value,*barray=baij->barray;
325   int         ierr,i,j,ii,jj,row,col;
326   int         roworiented = baij->roworiented,rstart=baij->rstart ;
327   int         rend=baij->rend,cstart=baij->cstart,stepval;
328   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
329 
330   if(!barray) {
331     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
332   }
333 
334   if (roworiented) {
335     stepval = (n-1)*bs;
336   } else {
337     stepval = (m-1)*bs;
338   }
339   for ( i=0; i<m; i++ ) {
340     if (im[i] < 0) continue;
341 #if defined(PETSC_USE_BOPT_g)
342     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
343 #endif
344     if (im[i] >= rstart && im[i] < rend) {
345       row = im[i] - rstart;
346       for ( j=0; j<n; j++ ) {
347         /* If NumCol = 1 then a copy is not required */
348         if ((roworiented) && (n == 1)) {
349           barray = v + i*bs2;
350         } else if((!roworiented) && (m == 1)) {
351           barray = v + j*bs2;
352         } else { /* Here a copy is required */
353           if (roworiented) {
354             value = v + i*(stepval+bs)*bs + j*bs;
355           } else {
356             value = v + j*(stepval+bs)*bs + i*bs;
357           }
358           for ( ii=0; ii<bs; ii++,value+=stepval ) {
359             for (jj=0; jj<bs; jj++ ) {
360               *barray++  = *value++;
361             }
362           }
363           barray -=bs2;
364         }
365 
366         if (in[j] >= cstart && in[j] < cend){
367           col  = in[j] - cstart;
368           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
369         }
370         else if (in[j] < 0) continue;
371 #if defined(PETSC_USE_BOPT_g)
372         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
373 #endif
374         else {
375           if (mat->was_assembled) {
376             if (!baij->colmap) {
377               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
378             }
379 
380 #if defined(PETSC_USE_BOPT_g)
381 #if defined (PETSC_USE_CTABLE)
382             { int data;
383             ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
384             if((data - 1) % bs)
385 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
386             }
387 #else
388             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
389 #endif
390 #endif
391 #if defined (PETSC_USE_CTABLE)
392 	    ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
393             col  = (col - 1)/bs;
394 #else
395             col = (baij->colmap[in[j]] - 1)/bs;
396 #endif
397             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
398               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
399               col =  in[j];
400             }
401           }
402           else col = in[j];
403           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
404         }
405       }
406     } else {
407       if (!baij->donotstash) {
408         if (roworiented) {
409           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
410         } else {
411           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         }
413       }
414     }
415   }
416   PetscFunctionReturn(0);
417 }
418 #define HASH_KEY 0.6180339887
419 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
420 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
421 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
422 #undef __FUNC__
423 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
424 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
425 {
426   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
427   int         ierr,i,j,row,col;
428   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
429   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
430   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
431   double      tmp;
432   Scalar      ** HD = baij->hd,value;
433 #if defined(PETSC_USE_BOPT_g)
434   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
435 #endif
436 
437   PetscFunctionBegin;
438 
439   for ( i=0; i<m; i++ ) {
440 #if defined(PETSC_USE_BOPT_g)
441     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
442     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
443 #endif
444       row = im[i];
445     if (row >= rstart_orig && row < rend_orig) {
446       for ( j=0; j<n; j++ ) {
447         col = in[j];
448         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
449         /* Look up into the Hash Table */
450         key = (row/bs)*Nbs+(col/bs)+1;
451         h1  = HASH(size,key,tmp);
452 
453 
454         idx = h1;
455 #if defined(PETSC_USE_BOPT_g)
456         insert_ct++;
457         total_ct++;
458         if (HT[idx] != key) {
459           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
460           if (idx == size) {
461             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
462             if (idx == h1) {
463               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
464             }
465           }
466         }
467 #else
468         if (HT[idx] != key) {
469           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
470           if (idx == size) {
471             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
472             if (idx == h1) {
473               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
474             }
475           }
476         }
477 #endif
478         /* A HASH table entry is found, so insert the values at the correct address */
479         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
480         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
481       }
482     } else {
483       if (!baij->donotstash) {
484         if (roworiented) {
485           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
486         } else {
487           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
488         }
489       }
490     }
491   }
492 #if defined(PETSC_USE_BOPT_g)
493   baij->ht_total_ct = total_ct;
494   baij->ht_insert_ct = insert_ct;
495 #endif
496   PetscFunctionReturn(0);
497 }
498 
499 #undef __FUNC__
500 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
501 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
502 {
503   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
504   int         ierr,i,j,ii,jj,row,col;
505   int         roworiented = baij->roworiented,rstart=baij->rstart ;
506   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
507   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
508   double      tmp;
509   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
510 #if defined(PETSC_USE_BOPT_g)
511   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
512 #endif
513 
514   PetscFunctionBegin;
515 
516   if (roworiented) {
517     stepval = (n-1)*bs;
518   } else {
519     stepval = (m-1)*bs;
520   }
521   for ( i=0; i<m; i++ ) {
522 #if defined(PETSC_USE_BOPT_g)
523     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
524     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
525 #endif
526     row   = im[i];
527     v_t   = v + i*bs2;
528     if (row >= rstart && row < rend) {
529       for ( j=0; j<n; j++ ) {
530         col = in[j];
531 
532         /* Look up into the Hash Table */
533         key = row*Nbs+col+1;
534         h1  = HASH(size,key,tmp);
535 
536         idx = h1;
537 #if defined(PETSC_USE_BOPT_g)
538         total_ct++;
539         insert_ct++;
540        if (HT[idx] != key) {
541           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
542           if (idx == size) {
543             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
544             if (idx == h1) {
545               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
546             }
547           }
548         }
549 #else
550         if (HT[idx] != key) {
551           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
552           if (idx == size) {
553             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
554             if (idx == h1) {
555               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
556             }
557           }
558         }
559 #endif
560         baij_a = HD[idx];
561         if (roworiented) {
562           /*value = v + i*(stepval+bs)*bs + j*bs;*/
563           /* value = v + (i*(stepval+bs)+j)*bs; */
564           value = v_t;
565           v_t  += bs;
566           if (addv == ADD_VALUES) {
567             for ( ii=0; ii<bs; ii++,value+=stepval) {
568               for ( jj=ii; jj<bs2; jj+=bs ) {
569                 baij_a[jj]  += *value++;
570               }
571             }
572           } else {
573             for ( ii=0; ii<bs; ii++,value+=stepval) {
574               for ( jj=ii; jj<bs2; jj+=bs ) {
575                 baij_a[jj]  = *value++;
576               }
577             }
578           }
579         } else {
580           value = v + j*(stepval+bs)*bs + i*bs;
581           if (addv == ADD_VALUES) {
582             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
583               for ( jj=0; jj<bs; jj++ ) {
584                 baij_a[jj]  += *value++;
585               }
586             }
587           } else {
588             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
589               for ( jj=0; jj<bs; jj++ ) {
590                 baij_a[jj]  = *value++;
591               }
592             }
593           }
594         }
595       }
596     } else {
597       if (!baij->donotstash) {
598         if (roworiented) {
599           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
600         } else {
601           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
602         }
603       }
604     }
605   }
606 #if defined(PETSC_USE_BOPT_g)
607   baij->ht_total_ct = total_ct;
608   baij->ht_insert_ct = insert_ct;
609 #endif
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNC__
614 #define __FUNC__ "MatGetValues_MPIBAIJ"
615 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
616 {
617   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
618   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
619   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
620 
621   PetscFunctionBegin;
622   for ( i=0; i<m; i++ ) {
623     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
624     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
625     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
626       row = idxm[i] - bsrstart;
627       for ( j=0; j<n; j++ ) {
628         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
629         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
630         if (idxn[j] >= bscstart && idxn[j] < bscend){
631           col = idxn[j] - bscstart;
632           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633         } else {
634           if (!baij->colmap) {
635             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
636           }
637 #if defined (PETSC_USE_CTABLE)
638           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
639           data --;
640 #else
641           data = baij->colmap[idxn[j]/bs]-1;
642 #endif
643           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
644           else {
645             col  = data + idxn[j]%bs;
646             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647           }
648         }
649       }
650     } else {
651       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
652     }
653   }
654  PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNC__
658 #define __FUNC__ "MatNorm_MPIBAIJ"
659 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
660 {
661   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
662   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
663   int        ierr, i,bs2=baij->bs2;
664   double     sum = 0.0;
665   Scalar     *v;
666 
667   PetscFunctionBegin;
668   if (baij->size == 1) {
669     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
670   } else {
671     if (type == NORM_FROBENIUS) {
672       v = amat->a;
673       for (i=0; i<amat->nz*bs2; i++ ) {
674 #if defined(PETSC_USE_COMPLEX)
675         sum += PetscReal(PetscConj(*v)*(*v)); v++;
676 #else
677         sum += (*v)*(*v); v++;
678 #endif
679       }
680       v = bmat->a;
681       for (i=0; i<bmat->nz*bs2; i++ ) {
682 #if defined(PETSC_USE_COMPLEX)
683         sum += PetscReal(PetscConj(*v)*(*v)); v++;
684 #else
685         sum += (*v)*(*v); v++;
686 #endif
687       }
688       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
689       *norm = sqrt(*norm);
690     } else {
691       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
692     }
693   }
694   PetscFunctionReturn(0);
695 }
696 
697 
698 /*
699   Creates the hash table, and sets the table
700   This table is created only once.
701   If new entried need to be added to the matrix
702   then the hash table has to be destroyed and
703   recreated.
704 */
705 #undef __FUNC__
706 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
707 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
708 {
709   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
710   Mat         A = baij->A, B=baij->B;
711   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
712   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
713   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
714   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
715   int         *HT,key;
716   Scalar      **HD;
717   double      tmp;
718 #if defined(PETSC_USE_BOPT_g)
719   int         ct=0,max=0;
720 #endif
721 
722   PetscFunctionBegin;
723   baij->ht_size=(int)(factor*nz);
724   size = baij->ht_size;
725 
726   if (baij->ht) {
727     PetscFunctionReturn(0);
728   }
729 
730   /* Allocate Memory for Hash Table */
731   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
732   baij->ht = (int*)(baij->hd + size);
733   HD = baij->hd;
734   HT = baij->ht;
735 
736 
737   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
738 
739 
740   /* Loop Over A */
741   for ( i=0; i<a->mbs; i++ ) {
742     for ( j=ai[i]; j<ai[i+1]; j++ ) {
743       row = i+rstart;
744       col = aj[j]+cstart;
745 
746       key = row*Nbs + col + 1;
747       h1  = HASH(size,key,tmp);
748       for ( k=0; k<size; k++ ){
749         if (HT[(h1+k)%size] == 0.0) {
750           HT[(h1+k)%size] = key;
751           HD[(h1+k)%size] = a->a + j*bs2;
752           break;
753 #if defined(PETSC_USE_BOPT_g)
754         } else {
755           ct++;
756 #endif
757         }
758       }
759 #if defined(PETSC_USE_BOPT_g)
760       if (k> max) max = k;
761 #endif
762     }
763   }
764   /* Loop Over B */
765   for ( i=0; i<b->mbs; i++ ) {
766     for ( j=bi[i]; j<bi[i+1]; j++ ) {
767       row = i+rstart;
768       col = garray[bj[j]];
769       key = row*Nbs + col + 1;
770       h1  = HASH(size,key,tmp);
771       for ( k=0; k<size; k++ ){
772         if (HT[(h1+k)%size] == 0.0) {
773           HT[(h1+k)%size] = key;
774           HD[(h1+k)%size] = b->a + j*bs2;
775           break;
776 #if defined(PETSC_USE_BOPT_g)
777         } else {
778           ct++;
779 #endif
780         }
781       }
782 #if defined(PETSC_USE_BOPT_g)
783       if (k> max) max = k;
784 #endif
785     }
786   }
787 
788   /* Print Summary */
789 #if defined(PETSC_USE_BOPT_g)
790   for ( i=0,j=0; i<size; i++)
791     if (HT[i]) {j++;}
792   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
793            (j== 0)? 0.0:((double)(ct+j))/j,max);
794 #endif
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNC__
799 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
800 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
801 {
802   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
803   int         ierr,nstash,reallocs;
804   InsertMode  addv;
805 
806   PetscFunctionBegin;
807   if (baij->donotstash) {
808     PetscFunctionReturn(0);
809   }
810 
811   /* make sure all processors are either in INSERTMODE or ADDMODE */
812   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
813   if (addv == (ADD_VALUES|INSERT_VALUES)) {
814     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
815   }
816   mat->insertmode = addv; /* in case this processor had no cache */
817 
818   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
819   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
820   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
821   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
822   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
823   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNC__
828 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
829 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
830 {
831   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
832   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
833   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
834   int         *row,*col,other_disassembled,r1,r2,r3;
835   Scalar      *val;
836   InsertMode  addv = mat->insertmode;
837 
838   PetscFunctionBegin;
839   if (!baij->donotstash) {
840     while (1) {
841       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
842       if (!flg) break;
843 
844       for ( i=0; i<n; ) {
845         /* Now identify the consecutive vals belonging to the same row */
846         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
847         if (j < n) ncols = j-i;
848         else       ncols = n-i;
849         /* Now assemble all these values with a single function call */
850         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
851         i = j;
852       }
853     }
854     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
855     /* Now process the block-stash. Since the values are stashed column-oriented,
856        set the roworiented flag to column oriented, and after MatSetValues()
857        restore the original flags */
858     r1 = baij->roworiented;
859     r2 = a->roworiented;
860     r3 = b->roworiented;
861     baij->roworiented = 0;
862     a->roworiented    = 0;
863     b->roworiented    = 0;
864     while (1) {
865       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
866       if (!flg) break;
867 
868       for ( i=0; i<n; ) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
871         if (j < n) ncols = j-i;
872         else       ncols = n-i;
873         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
874         i = j;
875       }
876     }
877     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
878     baij->roworiented = r1;
879     a->roworiented    = r2;
880     b->roworiented    = r3;
881   }
882 
883   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
884   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
885 
886   /* determine if any processor has disassembled, if so we must
887      also disassemble ourselfs, in order that we may reassemble. */
888   /*
889      if nonzero structure of submatrix B cannot change then we know that
890      no processor disassembled thus we can skip this stuff
891   */
892   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
893     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
894     if (mat->was_assembled && !other_disassembled) {
895       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
896     }
897   }
898 
899   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
900     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
901   }
902   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
903   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
904 
905 #if defined(PETSC_USE_BOPT_g)
906   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
907     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
908              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
909     baij->ht_total_ct  = 0;
910     baij->ht_insert_ct = 0;
911   }
912 #endif
913   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
914     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
915     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
916     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
917   }
918 
919   if (baij->rowvalues) {
920     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
921     baij->rowvalues = 0;
922   }
923   PetscFunctionReturn(0);
924 }
925 
926 /*
927 #undef __FUNC__
928 #define __FUNC__ "MatView_MPIBAIJ_Binary"
929 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
930 {
931   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
932   int          ierr;
933 
934   PetscFunctionBegin;
935   if (baij->size == 1) {
936     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
937   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
938   PetscFunctionReturn(0);
939 }
940 */
941 
942 #undef __FUNC__
943 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
944 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
945 {
946   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
947   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
948   FILE         *fd;
949   ViewerType   vtype;
950 
951   PetscFunctionBegin;
952   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
953   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
954     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
955     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
956       MatInfo info;
957       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
958       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
959       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
960       ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr);
961       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
962               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
963               baij->bs,(int)info.memory);
964       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
965       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
966       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
967       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
968       fflush(fd);
969       ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr);
970       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
971       PetscFunctionReturn(0);
972     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
973       ierr = PetscPrintf(mat->comm,"  block size is %d\n",bs);CHKERRQ(ierr);
974       PetscFunctionReturn(0);
975     }
976   }
977 
978   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
979     Draw       draw;
980     PetscTruth isnull;
981     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
982     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
983   }
984 
985   if (size == 1) {
986     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
987   } else {
988     /* assemble the entire matrix onto first processor. */
989     Mat         A;
990     Mat_SeqBAIJ *Aloc;
991     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
992     int         mbs = baij->mbs;
993     Scalar      *a;
994 
995     if (!rank) {
996       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
997     } else {
998       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
999     }
1000     PLogObjectParent(mat,A);
1001 
1002     /* copy over the A part */
1003     Aloc = (Mat_SeqBAIJ*) baij->A->data;
1004     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1005     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1006 
1007     for ( i=0; i<mbs; i++ ) {
1008       rvals[0] = bs*(baij->rstart + i);
1009       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1010       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1011         col = (baij->cstart+aj[j])*bs;
1012         for (k=0; k<bs; k++ ) {
1013           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1014           col++; a += bs;
1015         }
1016       }
1017     }
1018     /* copy over the B part */
1019     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1020     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1021     for ( i=0; i<mbs; i++ ) {
1022       rvals[0] = bs*(baij->rstart + i);
1023       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1024       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1025         col = baij->garray[aj[j]]*bs;
1026         for (k=0; k<bs; k++ ) {
1027           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1028           col++; a += bs;
1029         }
1030       }
1031     }
1032     ierr = PetscFree(rvals);CHKERRQ(ierr);
1033     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1034     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1035     /*
1036        Everyone has to call to draw the matrix since the graphics waits are
1037        synchronized across all processors that share the Draw object
1038     */
1039     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1040       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
1041     }
1042     ierr = MatDestroy(A);CHKERRQ(ierr);
1043   }
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 
1048 
1049 #undef __FUNC__
1050 #define __FUNC__ "MatView_MPIBAIJ"
1051 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1052 {
1053   int         ierr;
1054   ViewerType  vtype;
1055 
1056   PetscFunctionBegin;
1057   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
1058   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1059       PetscTypeCompare(vtype,SOCKET_VIEWER)|| PetscTypeCompare(vtype,BINARY_VIEWER)) {
1060     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1061     /*
1062   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1063     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1064     */
1065   } else {
1066     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNC__
1072 #define __FUNC__ "MatDestroy_MPIBAIJ"
1073 int MatDestroy_MPIBAIJ(Mat mat)
1074 {
1075   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1076   int         ierr;
1077 
1078   PetscFunctionBegin;
1079 
1080   if (mat->mapping) {
1081     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1082   }
1083   if (mat->bmapping) {
1084     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1085   }
1086   if (mat->rmap) {
1087     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1088   }
1089   if (mat->cmap) {
1090     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1091   }
1092 #if defined(PETSC_USE_LOG)
1093   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1094 #endif
1095 
1096   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1097   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1098 
1099   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1100   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1101   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1102 #if defined (PETSC_USE_CTABLE)
1103   if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);}
1104 #else
1105   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1106 #endif
1107   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1108   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1109   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1110   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1111   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1112   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1113   ierr = PetscFree(baij);CHKERRQ(ierr);
1114   PLogObjectDestroy(mat);
1115   PetscHeaderDestroy(mat);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNC__
1120 #define __FUNC__ "MatMult_MPIBAIJ"
1121 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1122 {
1123   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1124   int         ierr, nt;
1125 
1126   PetscFunctionBegin;
1127   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1128   if (nt != a->n) {
1129     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1130   }
1131   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1132   if (nt != a->m) {
1133     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1134   }
1135   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1136   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1137   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1138   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1139   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNC__
1144 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1145 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1146 {
1147   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1148   int        ierr;
1149 
1150   PetscFunctionBegin;
1151   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1152   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1153   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1154   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 #undef __FUNC__
1159 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1160 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1161 {
1162   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1163   int         ierr;
1164 
1165   PetscFunctionBegin;
1166   /* do nondiagonal part */
1167   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1168   /* send it on its way */
1169   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1170   /* do local part */
1171   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1172   /* receive remote parts: note this assumes the values are not actually */
1173   /* inserted in yy until the next line, which is true for my implementation*/
1174   /* but is not perhaps always true. */
1175   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNC__
1180 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1181 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1182 {
1183   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1184   int         ierr;
1185 
1186   PetscFunctionBegin;
1187   /* do nondiagonal part */
1188   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1189   /* send it on its way */
1190   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1191   /* do local part */
1192   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1193   /* receive remote parts: note this assumes the values are not actually */
1194   /* inserted in yy until the next line, which is true for my implementation*/
1195   /* but is not perhaps always true. */
1196   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 /*
1201   This only works correctly for square matrices where the subblock A->A is the
1202    diagonal block
1203 */
1204 #undef __FUNC__
1205 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1206 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1207 {
1208   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1209   int         ierr;
1210 
1211   PetscFunctionBegin;
1212   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1213   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 #undef __FUNC__
1218 #define __FUNC__ "MatScale_MPIBAIJ"
1219 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1220 {
1221   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1222   int         ierr;
1223 
1224   PetscFunctionBegin;
1225   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1226   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNC__
1231 #define __FUNC__ "MatGetSize_MPIBAIJ"
1232 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1233 {
1234   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1235 
1236   PetscFunctionBegin;
1237   if (m) *m = mat->M;
1238   if (n) *n = mat->N;
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNC__
1243 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1244 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1245 {
1246   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1247 
1248   PetscFunctionBegin;
1249   *m = mat->m; *n = mat->n;
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNC__
1254 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1255 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1256 {
1257   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1258 
1259   PetscFunctionBegin;
1260   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNC__
1265 #define __FUNC__ "MatGetRow_MPIBAIJ"
1266 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1267 {
1268   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1269   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1270   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1271   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1272   int        *cmap, *idx_p,cstart = mat->cstart;
1273 
1274   PetscFunctionBegin;
1275   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1276   mat->getrowactive = PETSC_TRUE;
1277 
1278   if (!mat->rowvalues && (idx || v)) {
1279     /*
1280         allocate enough space to hold information from the longest row.
1281     */
1282     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1283     int     max = 1,mbs = mat->mbs,tmp;
1284     for ( i=0; i<mbs; i++ ) {
1285       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1286       if (max < tmp) { max = tmp; }
1287     }
1288     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1289     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1290   }
1291 
1292   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1293   lrow = row - brstart;
1294 
1295   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1296   if (!v)   {pvA = 0; pvB = 0;}
1297   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1298   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1299   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1300   nztot = nzA + nzB;
1301 
1302   cmap  = mat->garray;
1303   if (v  || idx) {
1304     if (nztot) {
1305       /* Sort by increasing column numbers, assuming A and B already sorted */
1306       int imark = -1;
1307       if (v) {
1308         *v = v_p = mat->rowvalues;
1309         for ( i=0; i<nzB; i++ ) {
1310           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1311           else break;
1312         }
1313         imark = i;
1314         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1315         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1316       }
1317       if (idx) {
1318         *idx = idx_p = mat->rowindices;
1319         if (imark > -1) {
1320           for ( i=0; i<imark; i++ ) {
1321             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1322           }
1323         } else {
1324           for ( i=0; i<nzB; i++ ) {
1325             if (cmap[cworkB[i]/bs] < cstart)
1326               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1327             else break;
1328           }
1329           imark = i;
1330         }
1331         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1332         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1333       }
1334     } else {
1335       if (idx) *idx = 0;
1336       if (v)   *v   = 0;
1337     }
1338   }
1339   *nz = nztot;
1340   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1341   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNC__
1346 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1347 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1348 {
1349   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1350 
1351   PetscFunctionBegin;
1352   if (baij->getrowactive == PETSC_FALSE) {
1353     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1354   }
1355   baij->getrowactive = PETSC_FALSE;
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNC__
1360 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1361 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1362 {
1363   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1364 
1365   PetscFunctionBegin;
1366   *bs = baij->bs;
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNC__
1371 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1372 int MatZeroEntries_MPIBAIJ(Mat A)
1373 {
1374   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1375   int         ierr;
1376 
1377   PetscFunctionBegin;
1378   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1379   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNC__
1384 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1385 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1386 {
1387   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1388   Mat         A = a->A, B = a->B;
1389   int         ierr;
1390   double      isend[5], irecv[5];
1391 
1392   PetscFunctionBegin;
1393   info->block_size     = (double)a->bs;
1394   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1395   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1396   isend[3] = info->memory;  isend[4] = info->mallocs;
1397   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1398   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1399   isend[3] += info->memory;  isend[4] += info->mallocs;
1400   if (flag == MAT_LOCAL) {
1401     info->nz_used      = isend[0];
1402     info->nz_allocated = isend[1];
1403     info->nz_unneeded  = isend[2];
1404     info->memory       = isend[3];
1405     info->mallocs      = isend[4];
1406   } else if (flag == MAT_GLOBAL_MAX) {
1407     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1408     info->nz_used      = irecv[0];
1409     info->nz_allocated = irecv[1];
1410     info->nz_unneeded  = irecv[2];
1411     info->memory       = irecv[3];
1412     info->mallocs      = irecv[4];
1413   } else if (flag == MAT_GLOBAL_SUM) {
1414     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1415     info->nz_used      = irecv[0];
1416     info->nz_allocated = irecv[1];
1417     info->nz_unneeded  = irecv[2];
1418     info->memory       = irecv[3];
1419     info->mallocs      = irecv[4];
1420   } else {
1421     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1422   }
1423   info->rows_global       = (double)a->M;
1424   info->columns_global    = (double)a->N;
1425   info->rows_local        = (double)a->m;
1426   info->columns_local     = (double)a->N;
1427   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1428   info->fill_ratio_needed = 0;
1429   info->factor_mallocs    = 0;
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNC__
1434 #define __FUNC__ "MatSetOption_MPIBAIJ"
1435 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1436 {
1437   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1438   int         ierr;
1439 
1440   PetscFunctionBegin;
1441   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1442       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1443       op == MAT_COLUMNS_UNSORTED ||
1444       op == MAT_COLUMNS_SORTED ||
1445       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1446       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1447         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1448         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1449   } else if (op == MAT_ROW_ORIENTED) {
1450         a->roworiented = 1;
1451         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1452         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1453   } else if (op == MAT_ROWS_SORTED ||
1454              op == MAT_ROWS_UNSORTED ||
1455              op == MAT_SYMMETRIC ||
1456              op == MAT_STRUCTURALLY_SYMMETRIC ||
1457              op == MAT_YES_NEW_DIAGONALS ||
1458              op == MAT_USE_HASH_TABLE) {
1459     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1460   } else if (op == MAT_COLUMN_ORIENTED) {
1461     a->roworiented = 0;
1462     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1463     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1464   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1465     a->donotstash = 1;
1466   } else if (op == MAT_NO_NEW_DIAGONALS) {
1467     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1468   } else if (op == MAT_USE_HASH_TABLE) {
1469     a->ht_flag = 1;
1470   } else {
1471     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNC__
1477 #define __FUNC__ "MatTranspose_MPIBAIJ("
1478 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1479 {
1480   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1481   Mat_SeqBAIJ *Aloc;
1482   Mat        B;
1483   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1484   int        bs=baij->bs,mbs=baij->mbs;
1485   Scalar     *a;
1486 
1487   PetscFunctionBegin;
1488   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1489   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1490 CHKERRQ(ierr);
1491 
1492   /* copy over the A part */
1493   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1494   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1495   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1496 
1497   for ( i=0; i<mbs; i++ ) {
1498     rvals[0] = bs*(baij->rstart + i);
1499     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1500     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1501       col = (baij->cstart+aj[j])*bs;
1502       for (k=0; k<bs; k++ ) {
1503         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1504         col++; a += bs;
1505       }
1506     }
1507   }
1508   /* copy over the B part */
1509   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1510   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1511   for ( i=0; i<mbs; i++ ) {
1512     rvals[0] = bs*(baij->rstart + i);
1513     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1514     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1515       col = baij->garray[aj[j]]*bs;
1516       for (k=0; k<bs; k++ ) {
1517         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1518         col++; a += bs;
1519       }
1520     }
1521   }
1522   ierr = PetscFree(rvals);CHKERRQ(ierr);
1523   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1524   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1525 
1526   if (matout != PETSC_NULL) {
1527     *matout = B;
1528   } else {
1529     PetscOps *Abops;
1530     MatOps   Aops;
1531 
1532     /* This isn't really an in-place transpose .... but free data structures from baij */
1533     ierr = PetscFree(baij->rowners); CHKERRQ(ierr);
1534     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1535     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1536 #if defined (PETSC_USE_CTABLE)
1537     if (baij->colmap) {ierr = TableDelete(baij->colmap);CHKERRQ(ierr);}
1538 #else
1539     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1540 #endif
1541     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1542     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1543     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1544     ierr = PetscFree(baij);CHKERRQ(ierr);
1545 
1546     /*
1547        This is horrible, horrible code. We need to keep the
1548       A pointers for the bops and ops but copy everything
1549       else from C.
1550     */
1551     Abops   = A->bops;
1552     Aops    = A->ops;
1553     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1554     A->bops = Abops;
1555     A->ops  = Aops;
1556 
1557     PetscHeaderDestroy(B);
1558   }
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNC__
1563 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1564 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1565 {
1566   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1567   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1568   int ierr,s1,s2,s3;
1569 
1570   PetscFunctionBegin;
1571   if (ll)  {
1572     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1573     ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
1574     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1575     ierr = MatDiagonalScale(a,ll,0);CHKERRQ(ierr);
1576     ierr = MatDiagonalScale(b,ll,0);CHKERRQ(ierr);
1577   }
1578   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNC__
1583 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1584 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1585 {
1586   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1587   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1588   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1589   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1590   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1591   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1592   MPI_Comm       comm = A->comm;
1593   MPI_Request    *send_waits,*recv_waits;
1594   MPI_Status     recv_status,*send_status;
1595   IS             istmp;
1596 
1597   PetscFunctionBegin;
1598   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1599   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1600 
1601   /*  first count number of contributors to each processor */
1602   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1603   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1604  procs   = nprocs + size;
1605   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1606   for ( i=0; i<N; i++ ) {
1607     idx = rows[i];
1608     found = 0;
1609     for ( j=0; j<size; j++ ) {
1610       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1611         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1612       }
1613     }
1614     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1615   }
1616   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1617 
1618   /* inform other processors of number of messages and max length*/
1619   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
1620   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1621   nrecvs = work[rank];
1622   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1623   nmax   = work[rank];
1624   ierr = PetscFree(work);CHKERRQ(ierr);
1625 
1626   /* post receives:   */
1627   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1628   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1629   for ( i=0; i<nrecvs; i++ ) {
1630     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1631   }
1632 
1633   /* do sends:
1634      1) starts[i] gives the starting index in svalues for stuff going to
1635      the ith processor
1636   */
1637   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1638   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1639   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
1640   starts[0] = 0;
1641   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1642   for ( i=0; i<N; i++ ) {
1643     svalues[starts[owner[i]]++] = rows[i];
1644   }
1645   ISRestoreIndices(is,&rows);
1646 
1647   starts[0] = 0;
1648   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1649   count = 0;
1650   for ( i=0; i<size; i++ ) {
1651     if (procs[i]) {
1652       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1653     }
1654   }
1655   ierr = PetscFree(starts);CHKERRQ(ierr);
1656 
1657   base = owners[rank]*bs;
1658 
1659   /*  wait on receives */
1660   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
1661   source = lens + nrecvs;
1662   count  = nrecvs; slen = 0;
1663   while (count) {
1664     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1665     /* unpack receives into our local space */
1666     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1667     source[imdex]  = recv_status.MPI_SOURCE;
1668     lens[imdex]  = n;
1669     slen += n;
1670     count--;
1671   }
1672   ierr = PetscFree(recv_waits); CHKERRQ(ierr);
1673 
1674   /* move the data into the send scatter */
1675   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
1676   count = 0;
1677   for ( i=0; i<nrecvs; i++ ) {
1678     values = rvalues + i*nmax;
1679     for ( j=0; j<lens[i]; j++ ) {
1680       lrows[count++] = values[j] - base;
1681     }
1682   }
1683   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1684   ierr = PetscFree(lens);CHKERRQ(ierr);
1685   ierr = PetscFree(owner);CHKERRQ(ierr);
1686   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1687 
1688   /* actually zap the local rows */
1689   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1690   PLogObjectParent(A,istmp);
1691 
1692   /*
1693         Zero the required rows. If the "diagonal block" of the matrix
1694      is square and the user wishes to set the diagonal we use seperate
1695      code so that MatSetValues() is not called for each diagonal allocating
1696      new memory, thus calling lots of mallocs and slowing things down.
1697 
1698        Contributed by: Mathew Knepley
1699   */
1700   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1701   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
1702   if (diag && (l->A->M == l->A->N)) {
1703     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
1704   } else if (diag) {
1705     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1706     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1707       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1708 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1709     }
1710     for ( i = 0; i < slen; i++ ) {
1711       row  = lrows[i] + rstart_bs;
1712       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1713     }
1714     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1715     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1716   } else {
1717     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1718   }
1719 
1720   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1721   ierr = PetscFree(lrows);CHKERRQ(ierr);
1722 
1723   /* wait on sends */
1724   if (nsends) {
1725     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1726     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1727     ierr = PetscFree(send_status);CHKERRQ(ierr);
1728   }
1729   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1730   ierr = PetscFree(svalues);CHKERRQ(ierr);
1731 
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNC__
1736 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1737 int MatPrintHelp_MPIBAIJ(Mat A)
1738 {
1739   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1740   MPI_Comm    comm = A->comm;
1741   static int  called = 0;
1742   int         ierr;
1743 
1744   PetscFunctionBegin;
1745   if (!a->rank) {
1746     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1747   }
1748   if (called) {PetscFunctionReturn(0);} else called = 1;
1749   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1750   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNC__
1755 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1756 int MatSetUnfactored_MPIBAIJ(Mat A)
1757 {
1758   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1759   int         ierr;
1760 
1761   PetscFunctionBegin;
1762   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1767 
1768 #undef __FUNC__
1769 #define __FUNC__ "MatEqual_MPIBAIJ"
1770 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
1771 {
1772   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
1773   Mat         a, b, c, d;
1774   PetscTruth  flg;
1775   int         ierr;
1776 
1777   PetscFunctionBegin;
1778   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1779   a = matA->A; b = matA->B;
1780   c = matB->A; d = matB->B;
1781 
1782   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1783   if (flg == PETSC_TRUE) {
1784     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1785   }
1786   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 /* -------------------------------------------------------------------*/
1791 static struct _MatOps MatOps_Values = {
1792   MatSetValues_MPIBAIJ,
1793   MatGetRow_MPIBAIJ,
1794   MatRestoreRow_MPIBAIJ,
1795   MatMult_MPIBAIJ,
1796   MatMultAdd_MPIBAIJ,
1797   MatMultTrans_MPIBAIJ,
1798   MatMultTransAdd_MPIBAIJ,
1799   0,
1800   0,
1801   0,
1802   0,
1803   0,
1804   0,
1805   0,
1806   MatTranspose_MPIBAIJ,
1807   MatGetInfo_MPIBAIJ,
1808   MatEqual_MPIBAIJ,
1809   MatGetDiagonal_MPIBAIJ,
1810   MatDiagonalScale_MPIBAIJ,
1811   MatNorm_MPIBAIJ,
1812   MatAssemblyBegin_MPIBAIJ,
1813   MatAssemblyEnd_MPIBAIJ,
1814   0,
1815   MatSetOption_MPIBAIJ,
1816   MatZeroEntries_MPIBAIJ,
1817   MatZeroRows_MPIBAIJ,
1818   0,
1819   0,
1820   0,
1821   0,
1822   MatGetSize_MPIBAIJ,
1823   MatGetLocalSize_MPIBAIJ,
1824   MatGetOwnershipRange_MPIBAIJ,
1825   0,
1826   0,
1827   0,
1828   0,
1829   MatDuplicate_MPIBAIJ,
1830   0,
1831   0,
1832   0,
1833   0,
1834   0,
1835   MatGetSubMatrices_MPIBAIJ,
1836   MatIncreaseOverlap_MPIBAIJ,
1837   MatGetValues_MPIBAIJ,
1838   0,
1839   MatPrintHelp_MPIBAIJ,
1840   MatScale_MPIBAIJ,
1841   0,
1842   0,
1843   0,
1844   MatGetBlockSize_MPIBAIJ,
1845   0,
1846   0,
1847   0,
1848   0,
1849   0,
1850   0,
1851   MatSetUnfactored_MPIBAIJ,
1852   0,
1853   MatSetValuesBlocked_MPIBAIJ,
1854   0,
1855   0,
1856   0,
1857   MatGetMaps_Petsc};
1858 
1859 
1860 EXTERN_C_BEGIN
1861 #undef __FUNC__
1862 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1863 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1864 {
1865   PetscFunctionBegin;
1866   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1867   *iscopy = PETSC_FALSE;
1868   PetscFunctionReturn(0);
1869 }
1870 EXTERN_C_END
1871 
1872 #undef __FUNC__
1873 #define __FUNC__ "MatCreateMPIBAIJ"
1874 /*@C
1875    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1876    (block compressed row).  For good matrix assembly performance
1877    the user should preallocate the matrix storage by setting the parameters
1878    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1879    performance can be increased by more than a factor of 50.
1880 
1881    Collective on MPI_Comm
1882 
1883    Input Parameters:
1884 +  comm - MPI communicator
1885 .  bs   - size of blockk
1886 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1887            This value should be the same as the local size used in creating the
1888            y vector for the matrix-vector product y = Ax.
1889 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1890            This value should be the same as the local size used in creating the
1891            x vector for the matrix-vector product y = Ax.
1892 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1893 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1894 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1895            submatrix  (same for all local rows)
1896 .  d_nnz - array containing the number of block nonzeros in the various block rows
1897            of the in diagonal portion of the local (possibly different for each block
1898            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1899 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1900            submatrix (same for all local rows).
1901 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1902            off-diagonal portion of the local submatrix (possibly different for
1903            each block row) or PETSC_NULL.
1904 
1905    Output Parameter:
1906 .  A - the matrix
1907 
1908    Options Database Keys:
1909 .   -mat_no_unroll - uses code that does not unroll the loops in the
1910                      block calculations (much slower)
1911 .   -mat_block_size - size of the blocks to use
1912 .   -mat_mpi - use the parallel matrix data structures even on one processor
1913                (defaults to using SeqBAIJ format on one processor)
1914 
1915    Notes:
1916    The user MUST specify either the local or global matrix dimensions
1917    (possibly both).
1918 
1919    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1920    than it must be used on all processors that share the object for that argument.
1921 
1922    Storage Information:
1923    For a square global matrix we define each processor's diagonal portion
1924    to be its local rows and the corresponding columns (a square submatrix);
1925    each processor's off-diagonal portion encompasses the remainder of the
1926    local matrix (a rectangular submatrix).
1927 
1928    The user can specify preallocated storage for the diagonal part of
1929    the local submatrix with either d_nz or d_nnz (not both).  Set
1930    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1931    memory allocation.  Likewise, specify preallocated storage for the
1932    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1933 
1934    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1935    the figure below we depict these three local rows and all columns (0-11).
1936 
1937 .vb
1938            0 1 2 3 4 5 6 7 8 9 10 11
1939           -------------------
1940    row 3  |  o o o d d d o o o o o o
1941    row 4  |  o o o d d d o o o o o o
1942    row 5  |  o o o d d d o o o o o o
1943           -------------------
1944 .ve
1945 
1946    Thus, any entries in the d locations are stored in the d (diagonal)
1947    submatrix, and any entries in the o locations are stored in the
1948    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1949    stored simply in the MATSEQBAIJ format for compressed row storage.
1950 
1951    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1952    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1953    In general, for PDE problems in which most nonzeros are near the diagonal,
1954    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1955    or you will get TERRIBLE performance; see the users' manual chapter on
1956    matrices.
1957 
1958    Level: intermediate
1959 
1960 .keywords: matrix, block, aij, compressed row, sparse, parallel
1961 
1962 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1963 @*/
1964 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1965                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1966 {
1967   Mat          B;
1968   Mat_MPIBAIJ  *b;
1969   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1970   int          flag1 = 0,flag2 = 0;
1971 
1972   PetscFunctionBegin;
1973   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1974 
1975   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1976 
1977   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1978   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
1979   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1980   if (!flag1 && !flag2 && size == 1) {
1981     if (M == PETSC_DECIDE) M = m;
1982     if (N == PETSC_DECIDE) N = n;
1983     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1984     PetscFunctionReturn(0);
1985   }
1986 
1987   *A = 0;
1988   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
1989   PLogObjectCreate(B);
1990   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
1991   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
1992   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1993 
1994   B->ops->destroy    = MatDestroy_MPIBAIJ;
1995   B->ops->view       = MatView_MPIBAIJ;
1996   B->mapping    = 0;
1997   B->factor     = 0;
1998   B->assembled  = PETSC_FALSE;
1999 
2000   B->insertmode = NOT_SET_VALUES;
2001   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2002   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2003 
2004   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2005     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2006   }
2007   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2008     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2009   }
2010   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2011     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2012   }
2013   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2014   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2015 
2016   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2017     work[0] = m; work[1] = n;
2018     mbs = m/bs; nbs = n/bs;
2019     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2020     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2021     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2022   }
2023   if (m == PETSC_DECIDE) {
2024     Mbs = M/bs;
2025     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2026     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2027     m   = mbs*bs;
2028   }
2029   if (n == PETSC_DECIDE) {
2030     Nbs = N/bs;
2031     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2032     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2033     n   = nbs*bs;
2034   }
2035   if (mbs*bs != m || nbs*bs != n) {
2036     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2037   }
2038 
2039   b->m = m; B->m = m;
2040   b->n = n; B->n = n;
2041   b->N = N; B->N = N;
2042   b->M = M; B->M = M;
2043   b->bs  = bs;
2044   b->bs2 = bs*bs;
2045   b->mbs = mbs;
2046   b->nbs = nbs;
2047   b->Mbs = Mbs;
2048   b->Nbs = Nbs;
2049 
2050   /* the information in the maps duplicates the information computed below, eventually
2051      we should remove the duplicate information that is not contained in the maps */
2052   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2053   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2054 
2055   /* build local table of row and column ownerships */
2056   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2057   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2058   b->cowners    = b->rowners + b->size + 2;
2059   b->rowners_bs = b->cowners + b->size + 2;
2060   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2061   b->rowners[0]    = 0;
2062   for ( i=2; i<=b->size; i++ ) {
2063     b->rowners[i] += b->rowners[i-1];
2064   }
2065   for ( i=0; i<=b->size; i++ ) {
2066     b->rowners_bs[i] = b->rowners[i]*bs;
2067   }
2068   b->rstart    = b->rowners[b->rank];
2069   b->rend      = b->rowners[b->rank+1];
2070   b->rstart_bs = b->rstart * bs;
2071   b->rend_bs   = b->rend * bs;
2072 
2073   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2074   b->cowners[0] = 0;
2075   for ( i=2; i<=b->size; i++ ) {
2076     b->cowners[i] += b->cowners[i-1];
2077   }
2078   b->cstart    = b->cowners[b->rank];
2079   b->cend      = b->cowners[b->rank+1];
2080   b->cstart_bs = b->cstart * bs;
2081   b->cend_bs   = b->cend * bs;
2082 
2083 
2084   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2085   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2086   PLogObjectParent(B,b->A);
2087   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2088   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2089   PLogObjectParent(B,b->B);
2090 
2091   /* build cache for off array entries formed */
2092   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2093   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2094   b->donotstash  = 0;
2095   b->colmap      = 0;
2096   b->garray      = 0;
2097   b->roworiented = 1;
2098 
2099   /* stuff used in block assembly */
2100   b->barray       = 0;
2101 
2102   /* stuff used for matrix vector multiply */
2103   b->lvec         = 0;
2104   b->Mvctx        = 0;
2105 
2106   /* stuff for MatGetRow() */
2107   b->rowindices   = 0;
2108   b->rowvalues    = 0;
2109   b->getrowactive = PETSC_FALSE;
2110 
2111   /* hash table stuff */
2112   b->ht           = 0;
2113   b->hd           = 0;
2114   b->ht_size      = 0;
2115   b->ht_flag      = 0;
2116   b->ht_fact      = 0;
2117   b->ht_total_ct  = 0;
2118   b->ht_insert_ct = 0;
2119 
2120   *A = B;
2121   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2122   if (flg) {
2123     double fact = 1.39;
2124     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2125     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr);
2126     if (fact <= 1.0) fact = 1.39;
2127     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2128     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2129   }
2130   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
2131                                      "MatStoreValues_MPIBAIJ",
2132                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2133   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
2134                                      "MatRetrieveValues_MPIBAIJ",
2135                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2136   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2137                                      "MatGetDiagonalBlock_MPIBAIJ",
2138                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2139   PetscFunctionReturn(0);
2140 }
2141 
2142 #undef __FUNC__
2143 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2144 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2145 {
2146   Mat         mat;
2147   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2148   int         ierr, len=0, flg;
2149 
2150   PetscFunctionBegin;
2151   *newmat       = 0;
2152   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2153   PLogObjectCreate(mat);
2154   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2155   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2156   mat->ops->destroy = MatDestroy_MPIBAIJ;
2157   mat->ops->view    = MatView_MPIBAIJ;
2158   mat->factor       = matin->factor;
2159   mat->assembled    = PETSC_TRUE;
2160   mat->insertmode   = NOT_SET_VALUES;
2161 
2162   a->m = mat->m   = oldmat->m;
2163   a->n = mat->n   = oldmat->n;
2164   a->M = mat->M   = oldmat->M;
2165   a->N = mat->N   = oldmat->N;
2166 
2167   a->bs  = oldmat->bs;
2168   a->bs2 = oldmat->bs2;
2169   a->mbs = oldmat->mbs;
2170   a->nbs = oldmat->nbs;
2171   a->Mbs = oldmat->Mbs;
2172   a->Nbs = oldmat->Nbs;
2173 
2174   a->rstart       = oldmat->rstart;
2175   a->rend         = oldmat->rend;
2176   a->cstart       = oldmat->cstart;
2177   a->cend         = oldmat->cend;
2178   a->size         = oldmat->size;
2179   a->rank         = oldmat->rank;
2180   a->donotstash   = oldmat->donotstash;
2181   a->roworiented  = oldmat->roworiented;
2182   a->rowindices   = 0;
2183   a->rowvalues    = 0;
2184   a->getrowactive = PETSC_FALSE;
2185   a->barray       = 0;
2186   a->rstart_bs    = oldmat->rstart_bs;
2187   a->rend_bs      = oldmat->rend_bs;
2188   a->cstart_bs    = oldmat->cstart_bs;
2189   a->cend_bs      = oldmat->cend_bs;
2190 
2191   /* hash table stuff */
2192   a->ht           = 0;
2193   a->hd           = 0;
2194   a->ht_size      = 0;
2195   a->ht_flag      = oldmat->ht_flag;
2196   a->ht_fact      = oldmat->ht_fact;
2197   a->ht_total_ct  = 0;
2198   a->ht_insert_ct = 0;
2199 
2200 
2201   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2202   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2203   a->cowners    = a->rowners + a->size + 2;
2204   a->rowners_bs = a->cowners + a->size + 2;
2205   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2206   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2207   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2208   if (oldmat->colmap) {
2209 #if defined (PETSC_USE_CTABLE)
2210   ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2211 #else
2212     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2213     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2214     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2215 #endif
2216   } else a->colmap = 0;
2217   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2218     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2219     PLogObjectMemory(mat,len*sizeof(int));
2220     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2221   } else a->garray = 0;
2222 
2223   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2224   PLogObjectParent(mat,a->lvec);
2225   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2226 
2227   PLogObjectParent(mat,a->Mvctx);
2228   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2229   PLogObjectParent(mat,a->A);
2230   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2231   PLogObjectParent(mat,a->B);
2232   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2233   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2234   if (flg) {
2235     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2236   }
2237   *newmat = mat;
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 #include "sys.h"
2242 
2243 #undef __FUNC__
2244 #define __FUNC__ "MatLoad_MPIBAIJ"
2245 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2246 {
2247   Mat          A;
2248   int          i, nz, ierr, j,rstart, rend, fd;
2249   Scalar       *vals,*buf;
2250   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2251   MPI_Status   status;
2252   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2253   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2254   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2255   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2256   int          dcount,kmax,k,nzcount,tmp;
2257 
2258   PetscFunctionBegin;
2259   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2260 
2261   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2262   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2263   if (!rank) {
2264     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2265     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2266     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2267     if (header[3] < 0) {
2268       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2269     }
2270   }
2271 
2272   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2273   M = header[1]; N = header[2];
2274 
2275   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2276 
2277   /*
2278      This code adds extra rows to make sure the number of rows is
2279      divisible by the blocksize
2280   */
2281   Mbs        = M/bs;
2282   extra_rows = bs - M + bs*(Mbs);
2283   if (extra_rows == bs) extra_rows = 0;
2284   else                  Mbs++;
2285   if (extra_rows &&!rank) {
2286     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2287   }
2288 
2289   /* determine ownership of all rows */
2290   mbs = Mbs/size + ((Mbs % size) > rank);
2291   m   = mbs * bs;
2292   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2293   browners = rowners + size + 1;
2294   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2295   rowners[0] = 0;
2296   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2297   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2298   rstart = rowners[rank];
2299   rend   = rowners[rank+1];
2300 
2301   /* distribute row lengths to all processors */
2302   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
2303   if (!rank) {
2304     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2305     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2306     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2307     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2308     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2309     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2310     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2311   } else {
2312     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2313   }
2314 
2315   if (!rank) {
2316     /* calculate the number of nonzeros on each processor */
2317     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2318     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2319     for ( i=0; i<size; i++ ) {
2320       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2321         procsnz[i] += rowlengths[j];
2322       }
2323     }
2324     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2325 
2326     /* determine max buffer needed and allocate it */
2327     maxnz = 0;
2328     for ( i=0; i<size; i++ ) {
2329       maxnz = PetscMax(maxnz,procsnz[i]);
2330     }
2331     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2332 
2333     /* read in my part of the matrix column indices  */
2334     nz = procsnz[0];
2335     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2336     mycols = ibuf;
2337     if (size == 1)  nz -= extra_rows;
2338     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2339     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2340 
2341     /* read in every ones (except the last) and ship off */
2342     for ( i=1; i<size-1; i++ ) {
2343       nz   = procsnz[i];
2344       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2345       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2346     }
2347     /* read in the stuff for the last proc */
2348     if ( size != 1 ) {
2349       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2350       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2351       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2352       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2353     }
2354     ierr = PetscFree(cols);CHKERRQ(ierr);
2355   } else {
2356     /* determine buffer space needed for message */
2357     nz = 0;
2358     for ( i=0; i<m; i++ ) {
2359       nz += locrowlens[i];
2360     }
2361     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2362     mycols = ibuf;
2363     /* receive message of column indices*/
2364     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2365     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2366     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2367   }
2368 
2369   /* loop over local rows, determining number of off diagonal entries */
2370   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2371   odlens = dlens + (rend-rstart);
2372   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2373   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2374   masked1 = mask    + Mbs;
2375   masked2 = masked1 + Mbs;
2376   rowcount = 0; nzcount = 0;
2377   for ( i=0; i<mbs; i++ ) {
2378     dcount  = 0;
2379     odcount = 0;
2380     for ( j=0; j<bs; j++ ) {
2381       kmax = locrowlens[rowcount];
2382       for ( k=0; k<kmax; k++ ) {
2383         tmp = mycols[nzcount++]/bs;
2384         if (!mask[tmp]) {
2385           mask[tmp] = 1;
2386           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2387           else masked1[dcount++] = tmp;
2388         }
2389       }
2390       rowcount++;
2391     }
2392 
2393     dlens[i]  = dcount;
2394     odlens[i] = odcount;
2395 
2396     /* zero out the mask elements we set */
2397     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2398     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2399   }
2400 
2401   /* create our matrix */
2402   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2403   A = *newmat;
2404   MatSetOption(A,MAT_COLUMNS_SORTED);
2405 
2406   if (!rank) {
2407     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
2408     /* read in my part of the matrix numerical values  */
2409     nz = procsnz[0];
2410     vals = buf;
2411     mycols = ibuf;
2412     if (size == 1)  nz -= extra_rows;
2413     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2414     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2415 
2416     /* insert into matrix */
2417     jj      = rstart*bs;
2418     for ( i=0; i<m; i++ ) {
2419       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2420       mycols += locrowlens[i];
2421       vals   += locrowlens[i];
2422       jj++;
2423     }
2424     /* read in other processors (except the last one) and ship out */
2425     for ( i=1; i<size-1; i++ ) {
2426       nz   = procsnz[i];
2427       vals = buf;
2428       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2429       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2430     }
2431     /* the last proc */
2432     if ( size != 1 ){
2433       nz   = procsnz[i] - extra_rows;
2434       vals = buf;
2435       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2436       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2437       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2438     }
2439     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2440   } else {
2441     /* receive numeric values */
2442     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
2443 
2444     /* receive message of values*/
2445     vals   = buf;
2446     mycols = ibuf;
2447     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2448     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2449     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2450 
2451     /* insert into matrix */
2452     jj      = rstart*bs;
2453     for ( i=0; i<m; i++ ) {
2454       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2455       mycols += locrowlens[i];
2456       vals   += locrowlens[i];
2457       jj++;
2458     }
2459   }
2460   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2461   ierr = PetscFree(buf);CHKERRQ(ierr);
2462   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2463   ierr = PetscFree(rowners);CHKERRQ(ierr);
2464   ierr = PetscFree(dlens);CHKERRQ(ierr);
2465   ierr = PetscFree(mask);CHKERRQ(ierr);
2466   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2467   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 
2472 
2473 #undef __FUNC__
2474 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2475 /*@
2476    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2477 
2478    Input Parameters:
2479 .  mat  - the matrix
2480 .  fact - factor
2481 
2482    Collective on Mat
2483 
2484    Level: advanced
2485 
2486   Notes:
2487    This can also be set by the command line option: -mat_use_hash_table fact
2488 
2489 .keywords: matrix, hashtable, factor, HT
2490 
2491 .seealso: MatSetOption()
2492 @*/
2493 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2494 {
2495   Mat_MPIBAIJ *baij;
2496 
2497   PetscFunctionBegin;
2498   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2499   if (mat->type != MATMPIBAIJ) {
2500       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2501   }
2502   baij = (Mat_MPIBAIJ*) mat->data;
2503   baij->ht_fact = fact;
2504   PetscFunctionReturn(0);
2505 }
2506