xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 65f2ba5b4a48bc2ffe646adabb0c3d6c70b2f427)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.164 1999/03/18 00:35:48 balay Exp bsmith $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 /*
20      Local utility routine that creates a mapping from the global column
21    number to the local number in the off-diagonal part of the local
22    storage of the matrix.  This is done in a non scable way since the
23    length of colmap equals the global matrix length.
24 */
25 #undef __FUNC__
26 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
27 static int CreateColmap_MPIBAIJ_Private(Mat mat)
28 {
29   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
30   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
31   int         nbs = B->nbs,i,bs=B->bs;
32 #if defined (USE_CTABLE)
33   int         ierr;
34 #endif
35 
36   PetscFunctionBegin;
37 #if defined (USE_CTABLE)
38   ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr);
39   for ( i=0; i<nbs; i++ ){
40     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
41   }
42 #else
43   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
44   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
45   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
46   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
47 #endif
48   PetscFunctionReturn(0);
49 }
50 
51 #define CHUNKSIZE  10
52 
53 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
54 { \
55  \
56     brow = row/bs;  \
57     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
58     rmax = aimax[brow]; nrow = ailen[brow]; \
59       bcol = col/bs; \
60       ridx = row % bs; cidx = col % bs; \
61       low = 0; high = nrow; \
62       while (high-low > 3) { \
63         t = (low+high)/2; \
64         if (rp[t] > bcol) high = t; \
65         else              low  = t; \
66       } \
67       for ( _i=low; _i<high; _i++ ) { \
68         if (rp[_i] > bcol) break; \
69         if (rp[_i] == bcol) { \
70           bap  = ap +  bs2*_i + bs*cidx + ridx; \
71           if (addv == ADD_VALUES) *bap += value;  \
72           else                    *bap  = value;  \
73           goto a_noinsert; \
74         } \
75       } \
76       if (a->nonew == 1) goto a_noinsert; \
77       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
78       if (nrow >= rmax) { \
79         /* there is no extra room in row, therefore enlarge */ \
80         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
81         Scalar *new_a; \
82  \
83         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
84  \
85         /* malloc new storage space */ \
86         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
87         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
88         new_j   = (int *) (new_a + bs2*new_nz); \
89         new_i   = new_j + new_nz; \
90  \
91         /* copy over old data into new slots */ \
92         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
93         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
94         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
95         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
96         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
97                                                            len*sizeof(int)); \
98         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
99         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
100         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
101                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
102         /* free up old matrix storage */ \
103         PetscFree(a->a);  \
104         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
105         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
106         a->singlemalloc = 1; \
107  \
108         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
110         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
111         a->maxnz += bs2*CHUNKSIZE; \
112         a->reallocs++; \
113         a->nz++; \
114       } \
115       N = nrow++ - 1;  \
116       /* shift up all the later entries in this row */ \
117       for ( ii=N; ii>=_i; ii-- ) { \
118         rp[ii+1] = rp[ii]; \
119         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
120       } \
121       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
122       rp[_i]                      = bcol;  \
123       ap[bs2*_i + bs*cidx + ridx] = value;  \
124       a_noinsert:; \
125     ailen[brow] = nrow; \
126 }
127 
128 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
129 { \
130  \
131     brow = row/bs;  \
132     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
133     rmax = bimax[brow]; nrow = bilen[brow]; \
134       bcol = col/bs; \
135       ridx = row % bs; cidx = col % bs; \
136       low = 0; high = nrow; \
137       while (high-low > 3) { \
138         t = (low+high)/2; \
139         if (rp[t] > bcol) high = t; \
140         else              low  = t; \
141       } \
142       for ( _i=low; _i<high; _i++ ) { \
143         if (rp[_i] > bcol) break; \
144         if (rp[_i] == bcol) { \
145           bap  = ap +  bs2*_i + bs*cidx + ridx; \
146           if (addv == ADD_VALUES) *bap += value;  \
147           else                    *bap  = value;  \
148           goto b_noinsert; \
149         } \
150       } \
151       if (b->nonew == 1) goto b_noinsert; \
152       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
153       if (nrow >= rmax) { \
154         /* there is no extra room in row, therefore enlarge */ \
155         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
156         Scalar *new_a; \
157  \
158         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
159  \
160         /* malloc new storage space */ \
161         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
162         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
163         new_j   = (int *) (new_a + bs2*new_nz); \
164         new_i   = new_j + new_nz; \
165  \
166         /* copy over old data into new slots */ \
167         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
168         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
169         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
170         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
171         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
172                                                            len*sizeof(int)); \
173         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
174         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
175         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
176                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
177         /* free up old matrix storage */ \
178         PetscFree(b->a);  \
179         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
180         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
181         b->singlemalloc = 1; \
182  \
183         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
184         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
185         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
186         b->maxnz += bs2*CHUNKSIZE; \
187         b->reallocs++; \
188         b->nz++; \
189       } \
190       N = nrow++ - 1;  \
191       /* shift up all the later entries in this row */ \
192       for ( ii=N; ii>=_i; ii-- ) { \
193         rp[ii+1] = rp[ii]; \
194         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
195       } \
196       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
197       rp[_i]                      = bcol;  \
198       ap[bs2*_i + bs*cidx + ridx] = value;  \
199       b_noinsert:; \
200     bilen[brow] = nrow; \
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ "MatSetValues_MPIBAIJ"
205 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
206 {
207   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
208   Scalar      value;
209   int         ierr,i,j,row,col;
210   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
211   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
212   int         cend_orig=baij->cend_bs,bs=baij->bs;
213 
214   /* Some Variables required in the macro */
215   Mat         A = baij->A;
216   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
217   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
218   Scalar      *aa=a->a;
219 
220   Mat         B = baij->B;
221   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
222   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
223   Scalar      *ba=b->a;
224 
225   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
226   int         low,high,t,ridx,cidx,bs2=a->bs2;
227   Scalar      *ap,*bap;
228 
229   PetscFunctionBegin;
230   for ( i=0; i<m; i++ ) {
231     if (im[i] < 0) continue;
232 #if defined(USE_PETSC_BOPT_g)
233     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
234 #endif
235     if (im[i] >= rstart_orig && im[i] < rend_orig) {
236       row = im[i] - rstart_orig;
237       for ( j=0; j<n; j++ ) {
238         if (in[j] >= cstart_orig && in[j] < cend_orig){
239           col = in[j] - cstart_orig;
240           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
241           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
242           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         } else if (in[j] < 0) continue;
244 #if defined(USE_PETSC_BOPT_g)
245         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
246 #endif
247         else {
248           if (mat->was_assembled) {
249             if (!baij->colmap) {
250               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
251             }
252 #if defined (USE_CTABLE)
253             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr);
254             col  = col - 1 + in[j]%bs;
255 #else
256             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
257 #endif
258             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
259               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
260               col =  in[j];
261               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
262               B = baij->B;
263               b = (Mat_SeqBAIJ *) (B)->data;
264               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
265               ba=b->a;
266             }
267           } else col = in[j];
268           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
269           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
270           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
271         }
272       }
273     } else {
274       if ( !baij->donotstash) {
275         if (roworiented) {
276           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
277         } else {
278           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
279         }
280       }
281     }
282   }
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNC__
287 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
288 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
289 {
290   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
291   Scalar      *value,*barray=baij->barray;
292   int         ierr,i,j,ii,jj,row,col;
293   int         roworiented = baij->roworiented,rstart=baij->rstart ;
294   int         rend=baij->rend,cstart=baij->cstart,stepval;
295   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
296 
297   if(!barray) {
298     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
299   }
300 
301   if (roworiented) {
302     stepval = (n-1)*bs;
303   } else {
304     stepval = (m-1)*bs;
305   }
306   for ( i=0; i<m; i++ ) {
307     if (im[i] < 0) continue;
308 #if defined(USE_PETSC_BOPT_g)
309     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
310 #endif
311     if (im[i] >= rstart && im[i] < rend) {
312       row = im[i] - rstart;
313       for ( j=0; j<n; j++ ) {
314         /* If NumCol = 1 then a copy is not required */
315         if ((roworiented) && (n == 1)) {
316           barray = v + i*bs2;
317         } else if((!roworiented) && (m == 1)) {
318           barray = v + j*bs2;
319         } else { /* Here a copy is required */
320           if (roworiented) {
321             value = v + i*(stepval+bs)*bs + j*bs;
322           } else {
323             value = v + j*(stepval+bs)*bs + i*bs;
324           }
325           for ( ii=0; ii<bs; ii++,value+=stepval ) {
326             for (jj=0; jj<bs; jj++ ) {
327               *barray++  = *value++;
328             }
329           }
330           barray -=bs2;
331         }
332 
333         if (in[j] >= cstart && in[j] < cend){
334           col  = in[j] - cstart;
335           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
336         }
337         else if (in[j] < 0) continue;
338 #if defined(USE_PETSC_BOPT_g)
339         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
340 #endif
341         else {
342           if (mat->was_assembled) {
343             if (!baij->colmap) {
344               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
345             }
346 
347 #if defined(USE_PETSC_BOPT_g)
348 #if defined (USE_CTABLE)
349             { int data;
350             ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr);
351             if((data - 1) % bs)
352 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
353             }
354 #else
355             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
356 #endif
357 #endif
358 #if defined (USE_CTABLE)
359 	    ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr);
360             col  = (col - 1)/bs;
361 #else
362             col = (baij->colmap[in[j]] - 1)/bs;
363 #endif
364             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
365               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
366               col =  in[j];
367             }
368           }
369           else col = in[j];
370           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
371         }
372       }
373     } else {
374       if (!baij->donotstash) {
375         if (roworiented) {
376           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
377         } else {
378           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
379         }
380       }
381     }
382   }
383   PetscFunctionReturn(0);
384 }
385 #define HASH_KEY 0.6180339887
386 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
387 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
388 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
389 #undef __FUNC__
390 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
391 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
392 {
393   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
394   int         ierr,i,j,row,col;
395   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
396   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
397   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
398   double      tmp;
399   Scalar      ** HD = baij->hd,value;
400 #if defined(USE_PETSC_BOPT_g)
401   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
402 #endif
403 
404   PetscFunctionBegin;
405 
406   for ( i=0; i<m; i++ ) {
407 #if defined(USE_PETSC_BOPT_g)
408     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
409     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
410 #endif
411       row = im[i];
412     if (row >= rstart_orig && row < rend_orig) {
413       for ( j=0; j<n; j++ ) {
414         col = in[j];
415         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
416         /* Look up into the Hash Table */
417         key = (row/bs)*Nbs+(col/bs)+1;
418         h1  = HASH(size,key,tmp);
419 
420 
421         idx = h1;
422 #if defined(USE_PETSC_BOPT_g)
423         insert_ct++;
424         total_ct++;
425         if (HT[idx] != key) {
426           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
427           if (idx == size) {
428             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
429             if (idx == h1) {
430               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
431             }
432           }
433         }
434 #else
435         if (HT[idx] != key) {
436           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
437           if (idx == size) {
438             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
439             if (idx == h1) {
440               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
441             }
442           }
443         }
444 #endif
445         /* A HASH table entry is found, so insert the values at the correct address */
446         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
447         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
448       }
449     } else {
450       if (!baij->donotstash) {
451         if (roworiented) {
452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453         } else {
454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
455         }
456       }
457     }
458   }
459 #if defined(USE_PETSC_BOPT_g)
460   baij->ht_total_ct = total_ct;
461   baij->ht_insert_ct = insert_ct;
462 #endif
463   PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNC__
467 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
468 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
469 {
470   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
471   int         ierr,i,j,ii,jj,row,col;
472   int         roworiented = baij->roworiented,rstart=baij->rstart ;
473   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
474   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
475   double      tmp;
476   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
477 #if defined(USE_PETSC_BOPT_g)
478   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
479 #endif
480 
481   PetscFunctionBegin;
482 
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for ( i=0; i<m; i++ ) {
489 #if defined(USE_PETSC_BOPT_g)
490     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
491     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
492 #endif
493     row   = im[i];
494     v_t   = v + i*bs2;
495     if (row >= rstart && row < rend) {
496       for ( j=0; j<n; j++ ) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(USE_PETSC_BOPT_g)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for ( ii=0; ii<bs; ii++,value+=stepval) {
535               for ( jj=ii; jj<bs2; jj+=bs ) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for ( ii=0; ii<bs; ii++,value+=stepval) {
541               for ( jj=ii; jj<bs2; jj+=bs ) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
550               for ( jj=0; jj<bs; jj++ ) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
556               for ( jj=0; jj<bs; jj++ ) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(USE_PETSC_BOPT_g)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNC__
581 #define __FUNC__ "MatGetValues_MPIBAIJ"
582 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
583 {
584   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
585   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
586   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
587 
588   PetscFunctionBegin;
589   for ( i=0; i<m; i++ ) {
590     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
591     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
592     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
593       row = idxm[i] - bsrstart;
594       for ( j=0; j<n; j++ ) {
595         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
596         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
597         if (idxn[j] >= bscstart && idxn[j] < bscend){
598           col = idxn[j] - bscstart;
599           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
600         } else {
601           if (!baij->colmap) {
602             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
603           }
604 #if defined (USE_CTABLE)
605           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr);
606           data --;
607 #else
608           data = baij->colmap[idxn[j]/bs]-1;
609 #endif
610           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
611           else {
612             col  = data + idxn[j]%bs;
613             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
614           }
615         }
616       }
617     } else {
618       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
619     }
620   }
621  PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNC__
625 #define __FUNC__ "MatNorm_MPIBAIJ"
626 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
627 {
628   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
629   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
630   int        ierr, i,bs2=baij->bs2;
631   double     sum = 0.0;
632   Scalar     *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       for (i=0; i<amat->nz*bs2; i++ ) {
641 #if defined(USE_PETSC_COMPLEX)
642         sum += PetscReal(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       for (i=0; i<bmat->nz*bs2; i++ ) {
649 #if defined(USE_PETSC_COMPLEX)
650         sum += PetscReal(PetscConj(*v)*(*v)); v++;
651 #else
652         sum += (*v)*(*v); v++;
653 #endif
654       }
655       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
656       *norm = sqrt(*norm);
657     } else {
658       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
659     }
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 
665 /*
666   Creates the hash table, and sets the table
667   This table is created only once.
668   If new entried need to be added to the matrix
669   then the hash table has to be destroyed and
670   recreated.
671 */
672 #undef __FUNC__
673 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
674 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
675 {
676   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
677   Mat         A = baij->A, B=baij->B;
678   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
679   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
680   int         size,bs2=baij->bs2,rstart=baij->rstart;
681   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
682   int         *HT,key;
683   Scalar      **HD;
684   double      tmp;
685 #if defined(USE_PETSC_BOPT_g)
686   int         ct=0,max=0;
687 #endif
688 
689   PetscFunctionBegin;
690   baij->ht_size=(int)(factor*nz);
691   size = baij->ht_size;
692 
693   if (baij->ht) {
694     PetscFunctionReturn(0);
695   }
696 
697   /* Allocate Memory for Hash Table */
698   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
699   baij->ht = (int*)(baij->hd + size);
700   HD = baij->hd;
701   HT = baij->ht;
702 
703 
704   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
705 
706 
707   /* Loop Over A */
708   for ( i=0; i<a->mbs; i++ ) {
709     for ( j=ai[i]; j<ai[i+1]; j++ ) {
710       row = i+rstart;
711       col = aj[j]+cstart;
712 
713       key = row*Nbs + col + 1;
714       h1  = HASH(size,key,tmp);
715       for ( k=0; k<size; k++ ){
716         if (HT[(h1+k)%size] == 0.0) {
717           HT[(h1+k)%size] = key;
718           HD[(h1+k)%size] = a->a + j*bs2;
719           break;
720 #if defined(USE_PETSC_BOPT_g)
721         } else {
722           ct++;
723 #endif
724         }
725       }
726 #if defined(USE_PETSC_BOPT_g)
727       if (k> max) max = k;
728 #endif
729     }
730   }
731   /* Loop Over B */
732   for ( i=0; i<b->mbs; i++ ) {
733     for ( j=bi[i]; j<bi[i+1]; j++ ) {
734       row = i+rstart;
735       col = garray[bj[j]];
736       key = row*Nbs + col + 1;
737       h1  = HASH(size,key,tmp);
738       for ( k=0; k<size; k++ ){
739         if (HT[(h1+k)%size] == 0.0) {
740           HT[(h1+k)%size] = key;
741           HD[(h1+k)%size] = b->a + j*bs2;
742           break;
743 #if defined(USE_PETSC_BOPT_g)
744         } else {
745           ct++;
746 #endif
747         }
748       }
749 #if defined(USE_PETSC_BOPT_g)
750       if (k> max) max = k;
751 #endif
752     }
753   }
754 
755   /* Print Summary */
756 #if defined(USE_PETSC_BOPT_g)
757   for ( i=0,j=0; i<size; i++)
758     if (HT[i]) {j++;}
759   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
760            (j== 0)? 0.0:((double)(ct+j))/j,max);
761 #endif
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNC__
766 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
767 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
768 {
769   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
770   int         ierr,nstash,reallocs;
771   InsertMode  addv;
772 
773   PetscFunctionBegin;
774   if (baij->donotstash) {
775     PetscFunctionReturn(0);
776   }
777 
778   /* make sure all processors are either in INSERTMODE or ADDMODE */
779   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
780   if (addv == (ADD_VALUES|INSERT_VALUES)) {
781     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
782   }
783   mat->insertmode = addv; /* in case this processor had no cache */
784 
785   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs); CHKERRQ(ierr);
786   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners); CHKERRQ(ierr);
787   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr);
788   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
789   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr);
790   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNC__
795 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
796 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
797 {
798   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
799   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
800   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
801   int         *row,*col,other_disassembled,r1,r2,r3;
802   Scalar      *val;
803   InsertMode  addv = mat->insertmode;
804 
805   PetscFunctionBegin;
806   if (!baij->donotstash) {
807     while (1) {
808       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
809       if (!flg) break;
810 
811       for ( i=0; i<n; ) {
812         /* Now identify the consecutive vals belonging to the same row */
813         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
814         if (j < n) ncols = j-i;
815         else       ncols = n-i;
816         /* Now assemble all these values with a single function call */
817         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
818         i = j;
819       }
820     }
821     ierr = MatStashScatterEnd_Private(&mat->stash); CHKERRQ(ierr);
822     /* Now process the block-stash. Since the values are stashed column-oriented,
823        set the roworiented flag to column oriented, and after MatSetValues()
824        restore the original flags */
825     r1 = baij->roworiented;
826     r2 = a->roworiented;
827     r3 = b->roworiented;
828     baij->roworiented = 0;
829     a->roworiented    = 0;
830     b->roworiented    = 0;
831     while (1) {
832       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
833       if (!flg) break;
834 
835       for ( i=0; i<n; ) {
836         /* Now identify the consecutive vals belonging to the same row */
837         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
838         if (j < n) ncols = j-i;
839         else       ncols = n-i;
840         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr);
841         i = j;
842       }
843     }
844     ierr = MatStashScatterEnd_Private(&mat->bstash); CHKERRQ(ierr);
845     baij->roworiented = r1;
846     a->roworiented    = r2;
847     b->roworiented    = r3;
848   }
849 
850   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
851   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
852 
853   /* determine if any processor has disassembled, if so we must
854      also disassemble ourselfs, in order that we may reassemble. */
855   /*
856      if nonzero structure of submatrix B cannot change then we know that
857      no processor disassembled thus we can skip this stuff
858   */
859   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
860     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
861     if (mat->was_assembled && !other_disassembled) {
862       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
863     }
864   }
865 
866   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
867     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
868   }
869   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
870   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
871 
872 #if defined(USE_PETSC_BOPT_g)
873   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
874     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
875              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
876     baij->ht_total_ct  = 0;
877     baij->ht_insert_ct = 0;
878   }
879 #endif
880   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
881     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
882     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
883     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
884   }
885 
886   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNC__
891 #define __FUNC__ "MatView_MPIBAIJ_Binary"
892 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
893 {
894   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
895   int          ierr;
896 
897   PetscFunctionBegin;
898   if (baij->size == 1) {
899     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
900   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNC__
905 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
906 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
907 {
908   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
909   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
910   FILE         *fd;
911   ViewerType   vtype;
912 
913   PetscFunctionBegin;
914   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
915   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
916     ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr);
917     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
918       MatInfo info;
919       MPI_Comm_rank(mat->comm,&rank);
920       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
921       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
922       PetscSequentialPhaseBegin(mat->comm,1);
923       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
924               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
925               baij->bs,(int)info.memory);
926       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
927       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
928       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
929       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
930       fflush(fd);
931       PetscSequentialPhaseEnd(mat->comm,1);
932       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
933       PetscFunctionReturn(0);
934     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
935       PetscPrintf(mat->comm,"  block size is %d\n",bs);
936       PetscFunctionReturn(0);
937     }
938   }
939 
940   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
941     Draw       draw;
942     PetscTruth isnull;
943     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
944     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
945   }
946 
947   if (size == 1) {
948     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
949   } else {
950     /* assemble the entire matrix onto first processor. */
951     Mat         A;
952     Mat_SeqBAIJ *Aloc;
953     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
954     int         mbs = baij->mbs;
955     Scalar      *a;
956 
957     if (!rank) {
958       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
959     } else {
960       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
961     }
962     PLogObjectParent(mat,A);
963 
964     /* copy over the A part */
965     Aloc = (Mat_SeqBAIJ*) baij->A->data;
966     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
967     rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
968 
969     for ( i=0; i<mbs; i++ ) {
970       rvals[0] = bs*(baij->rstart + i);
971       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
972       for ( j=ai[i]; j<ai[i+1]; j++ ) {
973         col = (baij->cstart+aj[j])*bs;
974         for (k=0; k<bs; k++ ) {
975           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
976           col++; a += bs;
977         }
978       }
979     }
980     /* copy over the B part */
981     Aloc = (Mat_SeqBAIJ*) baij->B->data;
982     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
983     for ( i=0; i<mbs; i++ ) {
984       rvals[0] = bs*(baij->rstart + i);
985       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
986       for ( j=ai[i]; j<ai[i+1]; j++ ) {
987         col = baij->garray[aj[j]]*bs;
988         for (k=0; k<bs; k++ ) {
989           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
990           col++; a += bs;
991         }
992       }
993     }
994     PetscFree(rvals);
995     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
996     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
997     /*
998        Everyone has to call to draw the matrix since the graphics waits are
999        synchronized across all processors that share the Draw object
1000     */
1001     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1002       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1003     }
1004     ierr = MatDestroy(A); CHKERRQ(ierr);
1005   }
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 
1010 
1011 #undef __FUNC__
1012 #define __FUNC__ "MatView_MPIBAIJ"
1013 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1014 {
1015   int         ierr;
1016   ViewerType  vtype;
1017 
1018   PetscFunctionBegin;
1019   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1020   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1021       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
1022     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
1023   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1024     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1025   } else {
1026     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1027   }
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNC__
1032 #define __FUNC__ "MatDestroy_MPIBAIJ"
1033 int MatDestroy_MPIBAIJ(Mat mat)
1034 {
1035   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1036   int         ierr;
1037 
1038   PetscFunctionBegin;
1039   if (--mat->refct > 0) PetscFunctionReturn(0);
1040 
1041   if (mat->mapping) {
1042     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
1043   }
1044   if (mat->bmapping) {
1045     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
1046   }
1047   if (mat->rmap) {
1048     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1049   }
1050   if (mat->cmap) {
1051     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1052   }
1053 #if defined(USE_PETSC_LOG)
1054   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1055 #endif
1056 
1057   ierr = MatStashDestroy_Private(&mat->stash); CHKERRQ(ierr);
1058   ierr = MatStashDestroy_Private(&mat->bstash); CHKERRQ(ierr);
1059 
1060   PetscFree(baij->rowners);
1061   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1062   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1063 #if defined (USE_CTABLE)
1064   if (baij->colmap) TableDelete(baij->colmap);
1065 #else
1066   if (baij->colmap) PetscFree(baij->colmap);
1067 #endif
1068   if (baij->garray) PetscFree(baij->garray);
1069   if (baij->lvec)   VecDestroy(baij->lvec);
1070   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1071   if (baij->rowvalues) PetscFree(baij->rowvalues);
1072   if (baij->barray) PetscFree(baij->barray);
1073   if (baij->hd) PetscFree(baij->hd);
1074   PetscFree(baij);
1075   PLogObjectDestroy(mat);
1076   PetscHeaderDestroy(mat);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNC__
1081 #define __FUNC__ "MatMult_MPIBAIJ"
1082 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1083 {
1084   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1085   int         ierr, nt;
1086 
1087   PetscFunctionBegin;
1088   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1089   if (nt != a->n) {
1090     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1091   }
1092   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1093   if (nt != a->m) {
1094     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1095   }
1096   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1097   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1098   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1099   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1100   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNC__
1105 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1106 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1107 {
1108   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1109   int        ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1113   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1114   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1115   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNC__
1120 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1121 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1122 {
1123   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1124   int         ierr;
1125 
1126   PetscFunctionBegin;
1127   /* do nondiagonal part */
1128   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1129   /* send it on its way */
1130   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1131   /* do local part */
1132   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1133   /* receive remote parts: note this assumes the values are not actually */
1134   /* inserted in yy until the next line, which is true for my implementation*/
1135   /* but is not perhaps always true. */
1136   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNC__
1141 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1142 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1143 {
1144   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1145   int         ierr;
1146 
1147   PetscFunctionBegin;
1148   /* do nondiagonal part */
1149   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1150   /* send it on its way */
1151   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1152   /* do local part */
1153   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1154   /* receive remote parts: note this assumes the values are not actually */
1155   /* inserted in yy until the next line, which is true for my implementation*/
1156   /* but is not perhaps always true. */
1157   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 /*
1162   This only works correctly for square matrices where the subblock A->A is the
1163    diagonal block
1164 */
1165 #undef __FUNC__
1166 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1167 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1168 {
1169   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1170   int         ierr;
1171 
1172   PetscFunctionBegin;
1173   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1174   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNC__
1179 #define __FUNC__ "MatScale_MPIBAIJ"
1180 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1181 {
1182   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1183   int         ierr;
1184 
1185   PetscFunctionBegin;
1186   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1187   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNC__
1192 #define __FUNC__ "MatGetSize_MPIBAIJ"
1193 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1194 {
1195   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1196 
1197   PetscFunctionBegin;
1198   if (m) *m = mat->M;
1199   if (n) *n = mat->N;
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 #undef __FUNC__
1204 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1205 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1206 {
1207   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1208 
1209   PetscFunctionBegin;
1210   *m = mat->m; *n = mat->n;
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNC__
1215 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1216 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1217 {
1218   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1219 
1220   PetscFunctionBegin;
1221   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNC__
1226 #define __FUNC__ "MatGetRow_MPIBAIJ"
1227 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1228 {
1229   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1230   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1231   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1232   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1233   int        *cmap, *idx_p,cstart = mat->cstart;
1234 
1235   PetscFunctionBegin;
1236   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1237   mat->getrowactive = PETSC_TRUE;
1238 
1239   if (!mat->rowvalues && (idx || v)) {
1240     /*
1241         allocate enough space to hold information from the longest row.
1242     */
1243     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1244     int     max = 1,mbs = mat->mbs,tmp;
1245     for ( i=0; i<mbs; i++ ) {
1246       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1247       if (max < tmp) { max = tmp; }
1248     }
1249     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1250     CHKPTRQ(mat->rowvalues);
1251     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1252   }
1253 
1254   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1255   lrow = row - brstart;
1256 
1257   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1258   if (!v)   {pvA = 0; pvB = 0;}
1259   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1260   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1261   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1262   nztot = nzA + nzB;
1263 
1264   cmap  = mat->garray;
1265   if (v  || idx) {
1266     if (nztot) {
1267       /* Sort by increasing column numbers, assuming A and B already sorted */
1268       int imark = -1;
1269       if (v) {
1270         *v = v_p = mat->rowvalues;
1271         for ( i=0; i<nzB; i++ ) {
1272           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1273           else break;
1274         }
1275         imark = i;
1276         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1277         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1278       }
1279       if (idx) {
1280         *idx = idx_p = mat->rowindices;
1281         if (imark > -1) {
1282           for ( i=0; i<imark; i++ ) {
1283             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1284           }
1285         } else {
1286           for ( i=0; i<nzB; i++ ) {
1287             if (cmap[cworkB[i]/bs] < cstart)
1288               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1289             else break;
1290           }
1291           imark = i;
1292         }
1293         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1294         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1295       }
1296     } else {
1297       if (idx) *idx = 0;
1298       if (v)   *v   = 0;
1299     }
1300   }
1301   *nz = nztot;
1302   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1303   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNC__
1308 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1309 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1310 {
1311   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1312 
1313   PetscFunctionBegin;
1314   if (baij->getrowactive == PETSC_FALSE) {
1315     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1316   }
1317   baij->getrowactive = PETSC_FALSE;
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNC__
1322 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1323 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1324 {
1325   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1326 
1327   PetscFunctionBegin;
1328   *bs = baij->bs;
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNC__
1333 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1334 int MatZeroEntries_MPIBAIJ(Mat A)
1335 {
1336   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1337   int         ierr;
1338 
1339   PetscFunctionBegin;
1340   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1341   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNC__
1346 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1347 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1348 {
1349   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1350   Mat         A = a->A, B = a->B;
1351   int         ierr;
1352   double      isend[5], irecv[5];
1353 
1354   PetscFunctionBegin;
1355   info->block_size     = (double)a->bs;
1356   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1357   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1358   isend[3] = info->memory;  isend[4] = info->mallocs;
1359   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1360   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1361   isend[3] += info->memory;  isend[4] += info->mallocs;
1362   if (flag == MAT_LOCAL) {
1363     info->nz_used      = isend[0];
1364     info->nz_allocated = isend[1];
1365     info->nz_unneeded  = isend[2];
1366     info->memory       = isend[3];
1367     info->mallocs      = isend[4];
1368   } else if (flag == MAT_GLOBAL_MAX) {
1369     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1370     info->nz_used      = irecv[0];
1371     info->nz_allocated = irecv[1];
1372     info->nz_unneeded  = irecv[2];
1373     info->memory       = irecv[3];
1374     info->mallocs      = irecv[4];
1375   } else if (flag == MAT_GLOBAL_SUM) {
1376     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1377     info->nz_used      = irecv[0];
1378     info->nz_allocated = irecv[1];
1379     info->nz_unneeded  = irecv[2];
1380     info->memory       = irecv[3];
1381     info->mallocs      = irecv[4];
1382   } else {
1383     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1384   }
1385   info->rows_global       = (double)a->M;
1386   info->columns_global    = (double)a->N;
1387   info->rows_local        = (double)a->m;
1388   info->columns_local     = (double)a->N;
1389   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1390   info->fill_ratio_needed = 0;
1391   info->factor_mallocs    = 0;
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNC__
1396 #define __FUNC__ "MatSetOption_MPIBAIJ"
1397 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1398 {
1399   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1400   int         ierr;
1401 
1402   PetscFunctionBegin;
1403   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1404       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1405       op == MAT_COLUMNS_UNSORTED ||
1406       op == MAT_COLUMNS_SORTED ||
1407       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1408       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1409         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1410         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1411   } else if (op == MAT_ROW_ORIENTED) {
1412         a->roworiented = 1;
1413         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1414         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1415   } else if (op == MAT_ROWS_SORTED ||
1416              op == MAT_ROWS_UNSORTED ||
1417              op == MAT_SYMMETRIC ||
1418              op == MAT_STRUCTURALLY_SYMMETRIC ||
1419              op == MAT_YES_NEW_DIAGONALS ||
1420              op == MAT_USE_HASH_TABLE) {
1421     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1422   } else if (op == MAT_COLUMN_ORIENTED) {
1423     a->roworiented = 0;
1424     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1425     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1426   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1427     a->donotstash = 1;
1428   } else if (op == MAT_NO_NEW_DIAGONALS) {
1429     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1430   } else if (op == MAT_USE_HASH_TABLE) {
1431     a->ht_flag = 1;
1432   } else {
1433     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1434   }
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNC__
1439 #define __FUNC__ "MatTranspose_MPIBAIJ("
1440 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1441 {
1442   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1443   Mat_SeqBAIJ *Aloc;
1444   Mat        B;
1445   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1446   int        bs=baij->bs,mbs=baij->mbs;
1447   Scalar     *a;
1448 
1449   PetscFunctionBegin;
1450   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1451   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1452   CHKERRQ(ierr);
1453 
1454   /* copy over the A part */
1455   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1456   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1457   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1458 
1459   for ( i=0; i<mbs; i++ ) {
1460     rvals[0] = bs*(baij->rstart + i);
1461     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1462     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1463       col = (baij->cstart+aj[j])*bs;
1464       for (k=0; k<bs; k++ ) {
1465         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1466         col++; a += bs;
1467       }
1468     }
1469   }
1470   /* copy over the B part */
1471   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1472   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1473   for ( i=0; i<mbs; i++ ) {
1474     rvals[0] = bs*(baij->rstart + i);
1475     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1476     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1477       col = baij->garray[aj[j]]*bs;
1478       for (k=0; k<bs; k++ ) {
1479         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1480         col++; a += bs;
1481       }
1482     }
1483   }
1484   PetscFree(rvals);
1485   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1486   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1487 
1488   if (matout != PETSC_NULL) {
1489     *matout = B;
1490   } else {
1491     PetscOps *Abops;
1492     MatOps   Aops;
1493 
1494     /* This isn't really an in-place transpose .... but free data structures from baij */
1495     PetscFree(baij->rowners);
1496     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1497     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1498 #if defined (USE_CTABLE)
1499     if (baij->colmap) TableDelete(baij->colmap);
1500 #else
1501     if (baij->colmap) PetscFree(baij->colmap);
1502 #endif
1503     if (baij->garray) PetscFree(baij->garray);
1504     if (baij->lvec) VecDestroy(baij->lvec);
1505     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1506     PetscFree(baij);
1507 
1508     /*
1509        This is horrible, horrible code. We need to keep the
1510       A pointers for the bops and ops but copy everything
1511       else from C.
1512     */
1513     Abops = A->bops;
1514     Aops  = A->ops;
1515     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1516     A->bops = Abops;
1517     A->ops  = Aops;
1518 
1519     PetscHeaderDestroy(B);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNC__
1525 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1526 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1527 {
1528   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1529   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1530   int ierr,s1,s2,s3;
1531 
1532   PetscFunctionBegin;
1533   if (ll)  {
1534     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1535     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1536     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1537     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1538     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1539   }
1540   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNC__
1545 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1546 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1547 {
1548   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1549   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1550   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1551   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1552   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1553   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1554   MPI_Comm       comm = A->comm;
1555   MPI_Request    *send_waits,*recv_waits;
1556   MPI_Status     recv_status,*send_status;
1557   IS             istmp;
1558 
1559   PetscFunctionBegin;
1560   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1561   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1562 
1563   /*  first count number of contributors to each processor */
1564   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1565   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1566   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1567   for ( i=0; i<N; i++ ) {
1568     idx = rows[i];
1569     found = 0;
1570     for ( j=0; j<size; j++ ) {
1571       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1572         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1573       }
1574     }
1575     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1576   }
1577   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1578 
1579   /* inform other processors of number of messages and max length*/
1580   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1581   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1582   nrecvs = work[rank];
1583   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1584   nmax   = work[rank];
1585   PetscFree(work);
1586 
1587   /* post receives:   */
1588   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1589   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1590   for ( i=0; i<nrecvs; i++ ) {
1591     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1592   }
1593 
1594   /* do sends:
1595      1) starts[i] gives the starting index in svalues for stuff going to
1596      the ith processor
1597   */
1598   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1599   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1600   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1601   starts[0] = 0;
1602   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1603   for ( i=0; i<N; i++ ) {
1604     svalues[starts[owner[i]]++] = rows[i];
1605   }
1606   ISRestoreIndices(is,&rows);
1607 
1608   starts[0] = 0;
1609   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1610   count = 0;
1611   for ( i=0; i<size; i++ ) {
1612     if (procs[i]) {
1613       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1614     }
1615   }
1616   PetscFree(starts);
1617 
1618   base = owners[rank]*bs;
1619 
1620   /*  wait on receives */
1621   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1622   source = lens + nrecvs;
1623   count  = nrecvs; slen = 0;
1624   while (count) {
1625     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1626     /* unpack receives into our local space */
1627     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1628     source[imdex]  = recv_status.MPI_SOURCE;
1629     lens[imdex]  = n;
1630     slen += n;
1631     count--;
1632   }
1633   PetscFree(recv_waits);
1634 
1635   /* move the data into the send scatter */
1636   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1637   count = 0;
1638   for ( i=0; i<nrecvs; i++ ) {
1639     values = rvalues + i*nmax;
1640     for ( j=0; j<lens[i]; j++ ) {
1641       lrows[count++] = values[j] - base;
1642     }
1643   }
1644   PetscFree(rvalues); PetscFree(lens);
1645   PetscFree(owner); PetscFree(nprocs);
1646 
1647   /* actually zap the local rows */
1648   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1649   PLogObjectParent(A,istmp);
1650 
1651   /*
1652         Zero the required rows. If the "diagonal block" of the matrix
1653      is square and the user wishes to set the diagonal we use seperate
1654      code so that MatSetValues() is not called for each diagonal allocating
1655      new memory, thus calling lots of mallocs and slowing things down.
1656 
1657        Contributed by: Mathew Knepley
1658   */
1659   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1660   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1661   if (diag && (l->A->M == l->A->N)) {
1662     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1663   } else if (diag) {
1664     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1665     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1666       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1667 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1668     }
1669     for ( i = 0; i < slen; i++ ) {
1670       row  = lrows[i] + rstart_bs;
1671       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1672     }
1673     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1674     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1675   } else {
1676     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1677   }
1678 
1679   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1680   PetscFree(lrows);
1681 
1682   /* wait on sends */
1683   if (nsends) {
1684     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1685     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1686     PetscFree(send_status);
1687   }
1688   PetscFree(send_waits); PetscFree(svalues);
1689 
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 #undef __FUNC__
1694 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1695 int MatPrintHelp_MPIBAIJ(Mat A)
1696 {
1697   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1698   MPI_Comm    comm = A->comm;
1699   static int  called = 0;
1700   int         ierr;
1701 
1702   PetscFunctionBegin;
1703   if (!a->rank) {
1704     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1705   }
1706   if (called) {PetscFunctionReturn(0);} else called = 1;
1707   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1708   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 #undef __FUNC__
1713 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1714 int MatSetUnfactored_MPIBAIJ(Mat A)
1715 {
1716   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1717   int         ierr;
1718 
1719   PetscFunctionBegin;
1720   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1725 
1726 /* -------------------------------------------------------------------*/
1727 static struct _MatOps MatOps_Values = {
1728   MatSetValues_MPIBAIJ,
1729   MatGetRow_MPIBAIJ,
1730   MatRestoreRow_MPIBAIJ,
1731   MatMult_MPIBAIJ,
1732   MatMultAdd_MPIBAIJ,
1733   MatMultTrans_MPIBAIJ,
1734   MatMultTransAdd_MPIBAIJ,
1735   0,
1736   0,
1737   0,
1738   0,
1739   0,
1740   0,
1741   0,
1742   MatTranspose_MPIBAIJ,
1743   MatGetInfo_MPIBAIJ,
1744   0,
1745   MatGetDiagonal_MPIBAIJ,
1746   MatDiagonalScale_MPIBAIJ,
1747   MatNorm_MPIBAIJ,
1748   MatAssemblyBegin_MPIBAIJ,
1749   MatAssemblyEnd_MPIBAIJ,
1750   0,
1751   MatSetOption_MPIBAIJ,
1752   MatZeroEntries_MPIBAIJ,
1753   MatZeroRows_MPIBAIJ,
1754   0,
1755   0,
1756   0,
1757   0,
1758   MatGetSize_MPIBAIJ,
1759   MatGetLocalSize_MPIBAIJ,
1760   MatGetOwnershipRange_MPIBAIJ,
1761   0,
1762   0,
1763   0,
1764   0,
1765   MatDuplicate_MPIBAIJ,
1766   0,
1767   0,
1768   0,
1769   0,
1770   0,
1771   MatGetSubMatrices_MPIBAIJ,
1772   MatIncreaseOverlap_MPIBAIJ,
1773   MatGetValues_MPIBAIJ,
1774   0,
1775   MatPrintHelp_MPIBAIJ,
1776   MatScale_MPIBAIJ,
1777   0,
1778   0,
1779   0,
1780   MatGetBlockSize_MPIBAIJ,
1781   0,
1782   0,
1783   0,
1784   0,
1785   0,
1786   0,
1787   MatSetUnfactored_MPIBAIJ,
1788   0,
1789   MatSetValuesBlocked_MPIBAIJ,
1790   0,
1791   0,
1792   0,
1793   MatGetMaps_Petsc};
1794 
1795 
1796 EXTERN_C_BEGIN
1797 #undef __FUNC__
1798 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1799 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1800 {
1801   PetscFunctionBegin;
1802   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1803   *iscopy = PETSC_FALSE;
1804   PetscFunctionReturn(0);
1805 }
1806 EXTERN_C_END
1807 
1808 #undef __FUNC__
1809 #define __FUNC__ "MatCreateMPIBAIJ"
1810 /*@C
1811    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1812    (block compressed row).  For good matrix assembly performance
1813    the user should preallocate the matrix storage by setting the parameters
1814    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1815    performance can be increased by more than a factor of 50.
1816 
1817    Collective on MPI_Comm
1818 
1819    Input Parameters:
1820 +  comm - MPI communicator
1821 .  bs   - size of blockk
1822 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1823            This value should be the same as the local size used in creating the
1824            y vector for the matrix-vector product y = Ax.
1825 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1826            This value should be the same as the local size used in creating the
1827            x vector for the matrix-vector product y = Ax.
1828 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1829 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1830 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1831            submatrix  (same for all local rows)
1832 .  d_nzz - array containing the number of block nonzeros in the various block rows
1833            of the in diagonal portion of the local (possibly different for each block
1834            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1835 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1836            submatrix (same for all local rows).
1837 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1838            off-diagonal portion of the local submatrix (possibly different for
1839            each block row) or PETSC_NULL.
1840 
1841    Output Parameter:
1842 .  A - the matrix
1843 
1844    Options Database Keys:
1845 .   -mat_no_unroll - uses code that does not unroll the loops in the
1846                      block calculations (much slower)
1847 .   -mat_block_size - size of the blocks to use
1848 .   -mat_mpi - use the parallel matrix data structures even on one processor
1849                (defaults to using SeqBAIJ format on one processor)
1850 
1851    Notes:
1852    The user MUST specify either the local or global matrix dimensions
1853    (possibly both).
1854 
1855    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1856    than it must be used on all processors that share the object for that argument.
1857 
1858    Storage Information:
1859    For a square global matrix we define each processor's diagonal portion
1860    to be its local rows and the corresponding columns (a square submatrix);
1861    each processor's off-diagonal portion encompasses the remainder of the
1862    local matrix (a rectangular submatrix).
1863 
1864    The user can specify preallocated storage for the diagonal part of
1865    the local submatrix with either d_nz or d_nnz (not both).  Set
1866    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1867    memory allocation.  Likewise, specify preallocated storage for the
1868    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1869 
1870    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1871    the figure below we depict these three local rows and all columns (0-11).
1872 
1873 .vb
1874            0 1 2 3 4 5 6 7 8 9 10 11
1875           -------------------
1876    row 3  |  o o o d d d o o o o o o
1877    row 4  |  o o o d d d o o o o o o
1878    row 5  |  o o o d d d o o o o o o
1879           -------------------
1880 .ve
1881 
1882    Thus, any entries in the d locations are stored in the d (diagonal)
1883    submatrix, and any entries in the o locations are stored in the
1884    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1885    stored simply in the MATSEQBAIJ format for compressed row storage.
1886 
1887    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1888    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1889    In general, for PDE problems in which most nonzeros are near the diagonal,
1890    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1891    or you will get TERRIBLE performance; see the users' manual chapter on
1892    matrices.
1893 
1894    Level: intermediate
1895 
1896 .keywords: matrix, block, aij, compressed row, sparse, parallel
1897 
1898 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1899 @*/
1900 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1901                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1902 {
1903   Mat          B;
1904   Mat_MPIBAIJ  *b;
1905   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1906   int          flag1 = 0,flag2 = 0;
1907 
1908   PetscFunctionBegin;
1909   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1910 
1911   MPI_Comm_size(comm,&size);
1912   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
1913   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1914   if (!flag1 && !flag2 && size == 1) {
1915     if (M == PETSC_DECIDE) M = m;
1916     if (N == PETSC_DECIDE) N = n;
1917     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1918     PetscFunctionReturn(0);
1919   }
1920 
1921   *A = 0;
1922   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
1923   PLogObjectCreate(B);
1924   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1925   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1926   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1927 
1928   B->ops->destroy    = MatDestroy_MPIBAIJ;
1929   B->ops->view       = MatView_MPIBAIJ;
1930   B->mapping    = 0;
1931   B->factor     = 0;
1932   B->assembled  = PETSC_FALSE;
1933 
1934   B->insertmode = NOT_SET_VALUES;
1935   MPI_Comm_rank(comm,&b->rank);
1936   MPI_Comm_size(comm,&b->size);
1937 
1938   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1939     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1940   }
1941   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1942     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1943   }
1944   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1945     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1946   }
1947   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1948   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1949 
1950   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1951     work[0] = m; work[1] = n;
1952     mbs = m/bs; nbs = n/bs;
1953     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1954     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1955     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1956   }
1957   if (m == PETSC_DECIDE) {
1958     Mbs = M/bs;
1959     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1960     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1961     m   = mbs*bs;
1962   }
1963   if (n == PETSC_DECIDE) {
1964     Nbs = N/bs;
1965     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1966     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1967     n   = nbs*bs;
1968   }
1969   if (mbs*bs != m || nbs*bs != n) {
1970     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1971   }
1972 
1973   b->m = m; B->m = m;
1974   b->n = n; B->n = n;
1975   b->N = N; B->N = N;
1976   b->M = M; B->M = M;
1977   b->bs  = bs;
1978   b->bs2 = bs*bs;
1979   b->mbs = mbs;
1980   b->nbs = nbs;
1981   b->Mbs = Mbs;
1982   b->Nbs = Nbs;
1983 
1984   /* the information in the maps duplicates the information computed below, eventually
1985      we should remove the duplicate information that is not contained in the maps */
1986   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1987   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1988 
1989   /* build local table of row and column ownerships */
1990   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1991   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1992   b->cowners    = b->rowners + b->size + 2;
1993   b->rowners_bs = b->cowners + b->size + 2;
1994   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1995   b->rowners[0]    = 0;
1996   for ( i=2; i<=b->size; i++ ) {
1997     b->rowners[i] += b->rowners[i-1];
1998   }
1999   for ( i=0; i<=b->size; i++ ) {
2000     b->rowners_bs[i] = b->rowners[i]*bs;
2001   }
2002   b->rstart    = b->rowners[b->rank];
2003   b->rend      = b->rowners[b->rank+1];
2004   b->rstart_bs = b->rstart * bs;
2005   b->rend_bs   = b->rend * bs;
2006 
2007   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2008   b->cowners[0] = 0;
2009   for ( i=2; i<=b->size; i++ ) {
2010     b->cowners[i] += b->cowners[i-1];
2011   }
2012   b->cstart    = b->cowners[b->rank];
2013   b->cend      = b->cowners[b->rank+1];
2014   b->cstart_bs = b->cstart * bs;
2015   b->cend_bs   = b->cend * bs;
2016 
2017 
2018   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2019   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2020   PLogObjectParent(B,b->A);
2021   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2022   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2023   PLogObjectParent(B,b->B);
2024 
2025   /* build cache for off array entries formed */
2026   ierr = MatStashCreate_Private(comm,1,&B->stash); CHKERRQ(ierr);
2027   ierr = MatStashCreate_Private(comm,bs,&B->bstash); CHKERRQ(ierr);
2028   b->donotstash  = 0;
2029   b->colmap      = 0;
2030   b->garray      = 0;
2031   b->roworiented = 1;
2032 
2033   /* stuff used in block assembly */
2034   b->barray       = 0;
2035 
2036   /* stuff used for matrix vector multiply */
2037   b->lvec         = 0;
2038   b->Mvctx        = 0;
2039 
2040   /* stuff for MatGetRow() */
2041   b->rowindices   = 0;
2042   b->rowvalues    = 0;
2043   b->getrowactive = PETSC_FALSE;
2044 
2045   /* hash table stuff */
2046   b->ht           = 0;
2047   b->hd           = 0;
2048   b->ht_size      = 0;
2049   b->ht_flag      = 0;
2050   b->ht_fact      = 0;
2051   b->ht_total_ct  = 0;
2052   b->ht_insert_ct = 0;
2053 
2054   *A = B;
2055   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2056   if (flg) {
2057     double fact = 1.39;
2058     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2059     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2060     if (fact <= 1.0) fact = 1.39;
2061     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2062     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2063   }
2064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2065                                      "MatGetDiagonalBlock_MPIBAIJ",
2066                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNC__
2071 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2072 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2073 {
2074   Mat         mat;
2075   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2076   int         ierr, len=0, flg;
2077 
2078   PetscFunctionBegin;
2079   *newmat       = 0;
2080   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2081   PLogObjectCreate(mat);
2082   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2083   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2084   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2085   mat->ops->view       = MatView_MPIBAIJ;
2086   mat->factor          = matin->factor;
2087   mat->assembled       = PETSC_TRUE;
2088   mat->insertmode      = NOT_SET_VALUES;
2089 
2090   a->m = mat->m   = oldmat->m;
2091   a->n = mat->n   = oldmat->n;
2092   a->M = mat->M   = oldmat->M;
2093   a->N = mat->N   = oldmat->N;
2094 
2095   a->bs  = oldmat->bs;
2096   a->bs2 = oldmat->bs2;
2097   a->mbs = oldmat->mbs;
2098   a->nbs = oldmat->nbs;
2099   a->Mbs = oldmat->Mbs;
2100   a->Nbs = oldmat->Nbs;
2101 
2102   a->rstart       = oldmat->rstart;
2103   a->rend         = oldmat->rend;
2104   a->cstart       = oldmat->cstart;
2105   a->cend         = oldmat->cend;
2106   a->size         = oldmat->size;
2107   a->rank         = oldmat->rank;
2108   a->donotstash   = oldmat->donotstash;
2109   a->roworiented  = oldmat->roworiented;
2110   a->rowindices   = 0;
2111   a->rowvalues    = 0;
2112   a->getrowactive = PETSC_FALSE;
2113   a->barray       = 0;
2114   a->rstart_bs    = oldmat->rstart_bs;
2115   a->rend_bs      = oldmat->rend_bs;
2116   a->cstart_bs    = oldmat->cstart_bs;
2117   a->cend_bs      = oldmat->cend_bs;
2118 
2119   /* hash table stuff */
2120   a->ht           = 0;
2121   a->hd           = 0;
2122   a->ht_size      = 0;
2123   a->ht_flag      = oldmat->ht_flag;
2124   a->ht_fact      = oldmat->ht_fact;
2125   a->ht_total_ct  = 0;
2126   a->ht_insert_ct = 0;
2127 
2128 
2129   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2130   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2131   a->cowners    = a->rowners + a->size + 2;
2132   a->rowners_bs = a->cowners + a->size + 2;
2133   PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));
2134   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash); CHKERRQ(ierr);
2135   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash); CHKERRQ(ierr);
2136   if (oldmat->colmap) {
2137 #if defined (USE_CTABLE)
2138   ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
2139 #else
2140     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2141     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2142     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2143 #endif
2144   } else a->colmap = 0;
2145   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2146     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2147     PLogObjectMemory(mat,len*sizeof(int));
2148     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2149   } else a->garray = 0;
2150 
2151   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2152   PLogObjectParent(mat,a->lvec);
2153   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2154 
2155   PLogObjectParent(mat,a->Mvctx);
2156   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
2157   PLogObjectParent(mat,a->A);
2158   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
2159   PLogObjectParent(mat,a->B);
2160   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2161   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2162   if (flg) {
2163     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2164   }
2165   *newmat = mat;
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #include "sys.h"
2170 
2171 #undef __FUNC__
2172 #define __FUNC__ "MatLoad_MPIBAIJ"
2173 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2174 {
2175   Mat          A;
2176   int          i, nz, ierr, j,rstart, rend, fd;
2177   Scalar       *vals,*buf;
2178   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2179   MPI_Status   status;
2180   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2181   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2182   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2183   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2184   int          dcount,kmax,k,nzcount,tmp;
2185 
2186   PetscFunctionBegin;
2187   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2188 
2189   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2190   if (!rank) {
2191     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2192     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2193     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2194     if (header[3] < 0) {
2195       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2196     }
2197   }
2198 
2199   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2200   M = header[1]; N = header[2];
2201 
2202   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2203 
2204   /*
2205      This code adds extra rows to make sure the number of rows is
2206      divisible by the blocksize
2207   */
2208   Mbs        = M/bs;
2209   extra_rows = bs - M + bs*(Mbs);
2210   if (extra_rows == bs) extra_rows = 0;
2211   else                  Mbs++;
2212   if (extra_rows &&!rank) {
2213     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2214   }
2215 
2216   /* determine ownership of all rows */
2217   mbs = Mbs/size + ((Mbs % size) > rank);
2218   m   = mbs * bs;
2219   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2220   browners = rowners + size + 1;
2221   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2222   rowners[0] = 0;
2223   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2224   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2225   rstart = rowners[rank];
2226   rend   = rowners[rank+1];
2227 
2228   /* distribute row lengths to all processors */
2229   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2230   if (!rank) {
2231     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2232     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2233     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2234     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2235     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2236     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2237     PetscFree(sndcounts);
2238   } else {
2239     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2240   }
2241 
2242   if (!rank) {
2243     /* calculate the number of nonzeros on each processor */
2244     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2245     PetscMemzero(procsnz,size*sizeof(int));
2246     for ( i=0; i<size; i++ ) {
2247       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2248         procsnz[i] += rowlengths[j];
2249       }
2250     }
2251     PetscFree(rowlengths);
2252 
2253     /* determine max buffer needed and allocate it */
2254     maxnz = 0;
2255     for ( i=0; i<size; i++ ) {
2256       maxnz = PetscMax(maxnz,procsnz[i]);
2257     }
2258     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2259 
2260     /* read in my part of the matrix column indices  */
2261     nz = procsnz[0];
2262     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2263     mycols = ibuf;
2264     if (size == 1)  nz -= extra_rows;
2265     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2266     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2267 
2268     /* read in every ones (except the last) and ship off */
2269     for ( i=1; i<size-1; i++ ) {
2270       nz   = procsnz[i];
2271       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2272       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2273     }
2274     /* read in the stuff for the last proc */
2275     if ( size != 1 ) {
2276       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2277       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2278       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2279       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2280     }
2281     PetscFree(cols);
2282   } else {
2283     /* determine buffer space needed for message */
2284     nz = 0;
2285     for ( i=0; i<m; i++ ) {
2286       nz += locrowlens[i];
2287     }
2288     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2289     mycols = ibuf;
2290     /* receive message of column indices*/
2291     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2292     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2293     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2294   }
2295 
2296   /* loop over local rows, determining number of off diagonal entries */
2297   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2298   odlens = dlens + (rend-rstart);
2299   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2300   PetscMemzero(mask,3*Mbs*sizeof(int));
2301   masked1 = mask    + Mbs;
2302   masked2 = masked1 + Mbs;
2303   rowcount = 0; nzcount = 0;
2304   for ( i=0; i<mbs; i++ ) {
2305     dcount  = 0;
2306     odcount = 0;
2307     for ( j=0; j<bs; j++ ) {
2308       kmax = locrowlens[rowcount];
2309       for ( k=0; k<kmax; k++ ) {
2310         tmp = mycols[nzcount++]/bs;
2311         if (!mask[tmp]) {
2312           mask[tmp] = 1;
2313           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2314           else masked1[dcount++] = tmp;
2315         }
2316       }
2317       rowcount++;
2318     }
2319 
2320     dlens[i]  = dcount;
2321     odlens[i] = odcount;
2322 
2323     /* zero out the mask elements we set */
2324     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2325     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2326   }
2327 
2328   /* create our matrix */
2329   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2330          CHKERRQ(ierr);
2331   A = *newmat;
2332   MatSetOption(A,MAT_COLUMNS_SORTED);
2333 
2334   if (!rank) {
2335     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2336     /* read in my part of the matrix numerical values  */
2337     nz = procsnz[0];
2338     vals = buf;
2339     mycols = ibuf;
2340     if (size == 1)  nz -= extra_rows;
2341     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2342     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2343 
2344     /* insert into matrix */
2345     jj      = rstart*bs;
2346     for ( i=0; i<m; i++ ) {
2347       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2348       mycols += locrowlens[i];
2349       vals   += locrowlens[i];
2350       jj++;
2351     }
2352     /* read in other processors (except the last one) and ship out */
2353     for ( i=1; i<size-1; i++ ) {
2354       nz   = procsnz[i];
2355       vals = buf;
2356       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2357       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2358     }
2359     /* the last proc */
2360     if ( size != 1 ){
2361       nz   = procsnz[i] - extra_rows;
2362       vals = buf;
2363       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2364       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2365       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2366     }
2367     PetscFree(procsnz);
2368   } else {
2369     /* receive numeric values */
2370     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2371 
2372     /* receive message of values*/
2373     vals   = buf;
2374     mycols = ibuf;
2375     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2376     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2377     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2378 
2379     /* insert into matrix */
2380     jj      = rstart*bs;
2381     for ( i=0; i<m; i++ ) {
2382       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2383       mycols += locrowlens[i];
2384       vals   += locrowlens[i];
2385       jj++;
2386     }
2387   }
2388   PetscFree(locrowlens);
2389   PetscFree(buf);
2390   PetscFree(ibuf);
2391   PetscFree(rowners);
2392   PetscFree(dlens);
2393   PetscFree(mask);
2394   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2395   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 
2400 
2401 #undef __FUNC__
2402 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2403 /*@
2404    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2405 
2406    Input Parameters:
2407 .  mat  - the matrix
2408 .  fact - factor
2409 
2410    Collective on Mat
2411 
2412    Level: advanced
2413 
2414   Notes:
2415    This can also be set by the command line option: -mat_use_hash_table fact
2416 
2417 .keywords: matrix, hashtable, factor, HT
2418 
2419 .seealso: MatSetOption()
2420 @*/
2421 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2422 {
2423   Mat_MPIBAIJ *baij;
2424 
2425   PetscFunctionBegin;
2426   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2427   if (mat->type != MATMPIBAIJ) {
2428       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2429   }
2430   baij = (Mat_MPIBAIJ*) mat->data;
2431   baij->ht_fact = fact;
2432   PetscFunctionReturn(0);
2433 }
2434