xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision d0e4ac2e2bc9e1be9c22e9b3e15c37cdf7c79f7b)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.163 1999/03/16 21:10:39 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 /*
20      Local utility routine that creates a mapping from the global column
21    number to the local number in the off-diagonal part of the local
22    storage of the matrix.  This is done in a non scable way since the
23    length of colmap equals the global matrix length.
24 */
25 #undef __FUNC__
26 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
27 static int CreateColmap_MPIBAIJ_Private(Mat mat)
28 {
29   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
30   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
31   int         nbs = B->nbs,i,bs=B->bs;
32 #if defined (USE_CTABLE)
33   int         ierr;
34 #endif
35 
36   PetscFunctionBegin;
37 #if defined (USE_CTABLE)
38   ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr);
39   for ( i=0; i<nbs; i++ ){
40     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
41   }
42 #else
43   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
44   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
45   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
46   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
47 #endif
48   PetscFunctionReturn(0);
49 }
50 
51 #define CHUNKSIZE  10
52 
53 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
54 { \
55  \
56     brow = row/bs;  \
57     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
58     rmax = aimax[brow]; nrow = ailen[brow]; \
59       bcol = col/bs; \
60       ridx = row % bs; cidx = col % bs; \
61       low = 0; high = nrow; \
62       while (high-low > 3) { \
63         t = (low+high)/2; \
64         if (rp[t] > bcol) high = t; \
65         else              low  = t; \
66       } \
67       for ( _i=low; _i<high; _i++ ) { \
68         if (rp[_i] > bcol) break; \
69         if (rp[_i] == bcol) { \
70           bap  = ap +  bs2*_i + bs*cidx + ridx; \
71           if (addv == ADD_VALUES) *bap += value;  \
72           else                    *bap  = value;  \
73           goto a_noinsert; \
74         } \
75       } \
76       if (a->nonew == 1) goto a_noinsert; \
77       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
78       if (nrow >= rmax) { \
79         /* there is no extra room in row, therefore enlarge */ \
80         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
81         Scalar *new_a; \
82  \
83         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
84  \
85         /* malloc new storage space */ \
86         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
87         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
88         new_j   = (int *) (new_a + bs2*new_nz); \
89         new_i   = new_j + new_nz; \
90  \
91         /* copy over old data into new slots */ \
92         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
93         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
94         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
95         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
96         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
97                                                            len*sizeof(int)); \
98         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
99         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
100         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
101                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
102         /* free up old matrix storage */ \
103         PetscFree(a->a);  \
104         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
105         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
106         a->singlemalloc = 1; \
107  \
108         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
110         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
111         a->maxnz += bs2*CHUNKSIZE; \
112         a->reallocs++; \
113         a->nz++; \
114       } \
115       N = nrow++ - 1;  \
116       /* shift up all the later entries in this row */ \
117       for ( ii=N; ii>=_i; ii-- ) { \
118         rp[ii+1] = rp[ii]; \
119         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
120       } \
121       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
122       rp[_i]                      = bcol;  \
123       ap[bs2*_i + bs*cidx + ridx] = value;  \
124       a_noinsert:; \
125     ailen[brow] = nrow; \
126 }
127 
128 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
129 { \
130  \
131     brow = row/bs;  \
132     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
133     rmax = bimax[brow]; nrow = bilen[brow]; \
134       bcol = col/bs; \
135       ridx = row % bs; cidx = col % bs; \
136       low = 0; high = nrow; \
137       while (high-low > 3) { \
138         t = (low+high)/2; \
139         if (rp[t] > bcol) high = t; \
140         else              low  = t; \
141       } \
142       for ( _i=low; _i<high; _i++ ) { \
143         if (rp[_i] > bcol) break; \
144         if (rp[_i] == bcol) { \
145           bap  = ap +  bs2*_i + bs*cidx + ridx; \
146           if (addv == ADD_VALUES) *bap += value;  \
147           else                    *bap  = value;  \
148           goto b_noinsert; \
149         } \
150       } \
151       if (b->nonew == 1) goto b_noinsert; \
152       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
153       if (nrow >= rmax) { \
154         /* there is no extra room in row, therefore enlarge */ \
155         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
156         Scalar *new_a; \
157  \
158         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
159  \
160         /* malloc new storage space */ \
161         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
162         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
163         new_j   = (int *) (new_a + bs2*new_nz); \
164         new_i   = new_j + new_nz; \
165  \
166         /* copy over old data into new slots */ \
167         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
168         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
169         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
170         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
171         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
172                                                            len*sizeof(int)); \
173         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
174         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
175         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
176                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
177         /* free up old matrix storage */ \
178         PetscFree(b->a);  \
179         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
180         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
181         b->singlemalloc = 1; \
182  \
183         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
184         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
185         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
186         b->maxnz += bs2*CHUNKSIZE; \
187         b->reallocs++; \
188         b->nz++; \
189       } \
190       N = nrow++ - 1;  \
191       /* shift up all the later entries in this row */ \
192       for ( ii=N; ii>=_i; ii-- ) { \
193         rp[ii+1] = rp[ii]; \
194         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
195       } \
196       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
197       rp[_i]                      = bcol;  \
198       ap[bs2*_i + bs*cidx + ridx] = value;  \
199       b_noinsert:; \
200     bilen[brow] = nrow; \
201 }
202 
203 #undef __FUNC__
204 #define __FUNC__ "MatSetValues_MPIBAIJ"
205 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
206 {
207   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
208   Scalar      value;
209   int         ierr,i,j,row,col;
210   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
211   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
212   int         cend_orig=baij->cend_bs,bs=baij->bs;
213 
214   /* Some Variables required in the macro */
215   Mat         A = baij->A;
216   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
217   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
218   Scalar      *aa=a->a;
219 
220   Mat         B = baij->B;
221   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
222   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
223   Scalar      *ba=b->a;
224 
225   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
226   int         low,high,t,ridx,cidx,bs2=a->bs2;
227   Scalar      *ap,*bap;
228 
229   PetscFunctionBegin;
230   for ( i=0; i<m; i++ ) {
231     if (im[i] < 0) continue;
232 #if defined(USE_PETSC_BOPT_g)
233     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
234 #endif
235     if (im[i] >= rstart_orig && im[i] < rend_orig) {
236       row = im[i] - rstart_orig;
237       for ( j=0; j<n; j++ ) {
238         if (in[j] >= cstart_orig && in[j] < cend_orig){
239           col = in[j] - cstart_orig;
240           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
241           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
242           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
243         } else if (in[j] < 0) continue;
244 #if defined(USE_PETSC_BOPT_g)
245         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
246 #endif
247         else {
248           if (mat->was_assembled) {
249             if (!baij->colmap) {
250               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
251             }
252 #if defined (USE_CTABLE)
253             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr);
254             col  = col - 1 + in[j]%bs;
255 #else
256             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
257 #endif
258             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
259               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
260               col =  in[j];
261               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
262               B = baij->B;
263               b = (Mat_SeqBAIJ *) (B)->data;
264               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
265               ba=b->a;
266             }
267           } else col = in[j];
268           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
269           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
270           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
271         }
272       }
273     } else {
274       if ( !baij->donotstash) {
275         if (roworiented) {
276           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
277         } else {
278           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
279         }
280       }
281     }
282   }
283   PetscFunctionReturn(0);
284 }
285 
286 #undef __FUNC__
287 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
288 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
289 {
290   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
291   Scalar      *value,*barray=baij->barray;
292   int         ierr,i,j,ii,jj,row,col;
293   int         roworiented = baij->roworiented,rstart=baij->rstart ;
294   int         rend=baij->rend,cstart=baij->cstart,stepval;
295   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
296 
297   if(!barray) {
298     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
299   }
300 
301   if (roworiented) {
302     stepval = (n-1)*bs;
303   } else {
304     stepval = (m-1)*bs;
305   }
306   for ( i=0; i<m; i++ ) {
307     if (im[i] < 0) continue;
308 #if defined(USE_PETSC_BOPT_g)
309     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
310 #endif
311     if (im[i] >= rstart && im[i] < rend) {
312       row = im[i] - rstart;
313       for ( j=0; j<n; j++ ) {
314         /* If NumCol = 1 then a copy is not required */
315         if ((roworiented) && (n == 1)) {
316           barray = v + i*bs2;
317         } else if((!roworiented) && (m == 1)) {
318           barray = v + j*bs2;
319         } else { /* Here a copy is required */
320           if (roworiented) {
321             value = v + i*(stepval+bs)*bs + j*bs;
322           } else {
323             value = v + j*(stepval+bs)*bs + i*bs;
324           }
325           for ( ii=0; ii<bs; ii++,value+=stepval ) {
326             for (jj=0; jj<bs; jj++ ) {
327               *barray++  = *value++;
328             }
329           }
330           barray -=bs2;
331         }
332 
333         if (in[j] >= cstart && in[j] < cend){
334           col  = in[j] - cstart;
335           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
336         }
337         else if (in[j] < 0) continue;
338 #if defined(USE_PETSC_BOPT_g)
339         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
340 #endif
341         else {
342           if (mat->was_assembled) {
343             if (!baij->colmap) {
344               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
345             }
346 
347 #if defined(USE_PETSC_BOPT_g)
348 #if defined (USE_CTABLE)
349             { int data;
350             ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr);
351             if((data - 1) % bs)
352 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
353             }
354 #else
355             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
356 #endif
357 #endif
358 #if defined (USE_CTABLE)
359 	    ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr);
360             col  = (col - 1)/bs;
361 #else
362             col = (baij->colmap[in[j]] - 1)/bs;
363 #endif
364             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
365               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
366               col =  in[j];
367             }
368           }
369           else col = in[j];
370           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
371         }
372       }
373     } else {
374       if (!baij->donotstash) {
375         if (roworiented) {
376           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
377         } else {
378           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
379         }
380       }
381     }
382   }
383   PetscFunctionReturn(0);
384 }
385 #define HASH_KEY 0.6180339887
386 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
387 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
388 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
389 #undef __FUNC__
390 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
391 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
392 {
393   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
394   int         ierr,i,j,row,col;
395   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
396   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
397   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
398   double      tmp;
399   Scalar      ** HD = baij->hd,value;
400 #if defined(USE_PETSC_BOPT_g)
401   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
402 #endif
403 
404   PetscFunctionBegin;
405 
406   for ( i=0; i<m; i++ ) {
407 #if defined(USE_PETSC_BOPT_g)
408     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
409     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
410 #endif
411       row = im[i];
412     if (row >= rstart_orig && row < rend_orig) {
413       for ( j=0; j<n; j++ ) {
414         col = in[j];
415         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
416         /* Look up into the Hash Table */
417         key = (row/bs)*Nbs+(col/bs)+1;
418         h1  = HASH(size,key,tmp);
419 
420 
421         idx = h1;
422 #if defined(USE_PETSC_BOPT_g)
423         insert_ct++;
424         total_ct++;
425         if (HT[idx] != key) {
426           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
427           if (idx == size) {
428             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
429             if (idx == h1) {
430               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
431             }
432           }
433         }
434 #else
435         if (HT[idx] != key) {
436           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
437           if (idx == size) {
438             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
439             if (idx == h1) {
440               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
441             }
442           }
443         }
444 #endif
445         /* A HASH table entry is found, so insert the values at the correct address */
446         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
447         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
448       }
449     } else {
450       if (!baij->donotstash) {
451         if (roworiented) {
452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453         } else {
454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
455         }
456       }
457     }
458   }
459 #if defined(USE_PETSC_BOPT_g)
460   baij->ht_total_ct = total_ct;
461   baij->ht_insert_ct = insert_ct;
462 #endif
463   PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNC__
467 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
468 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
469 {
470   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
471   int         ierr,i,j,ii,jj,row,col;
472   int         roworiented = baij->roworiented,rstart=baij->rstart ;
473   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
474   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
475   double      tmp;
476   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
477 #if defined(USE_PETSC_BOPT_g)
478   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
479 #endif
480 
481   PetscFunctionBegin;
482 
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for ( i=0; i<m; i++ ) {
489 #if defined(USE_PETSC_BOPT_g)
490     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
491     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
492 #endif
493     row   = im[i];
494     v_t   = v + i*bs2;
495     if (row >= rstart && row < rend) {
496       for ( j=0; j<n; j++ ) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(USE_PETSC_BOPT_g)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for ( ii=0; ii<bs; ii++,value+=stepval) {
535               for ( jj=ii; jj<bs2; jj+=bs ) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for ( ii=0; ii<bs; ii++,value+=stepval) {
541               for ( jj=ii; jj<bs2; jj+=bs ) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
550               for ( jj=0; jj<bs; jj++ ) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
556               for ( jj=0; jj<bs; jj++ ) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(USE_PETSC_BOPT_g)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNC__
581 #define __FUNC__ "MatGetValues_MPIBAIJ"
582 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
583 {
584   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
585   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
586   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
587 
588   PetscFunctionBegin;
589   for ( i=0; i<m; i++ ) {
590     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
591     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
592     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
593       row = idxm[i] - bsrstart;
594       for ( j=0; j<n; j++ ) {
595         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
596         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
597         if (idxn[j] >= bscstart && idxn[j] < bscend){
598           col = idxn[j] - bscstart;
599           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
600         } else {
601           if (!baij->colmap) {
602             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
603           }
604 #if defined (USE_CTABLE)
605           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr);
606           data --;
607 #else
608           data = baij->colmap[idxn[j]/bs]-1;
609 #endif
610           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
611           else {
612             col  = data + idxn[j]%bs;
613             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
614           }
615         }
616       }
617     } else {
618       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
619     }
620   }
621  PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNC__
625 #define __FUNC__ "MatNorm_MPIBAIJ"
626 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
627 {
628   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
629   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
630   int        ierr, i,bs2=baij->bs2;
631   double     sum = 0.0;
632   Scalar     *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       for (i=0; i<amat->nz*bs2; i++ ) {
641 #if defined(USE_PETSC_COMPLEX)
642         sum += PetscReal(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       for (i=0; i<bmat->nz*bs2; i++ ) {
649 #if defined(USE_PETSC_COMPLEX)
650         sum += PetscReal(PetscConj(*v)*(*v)); v++;
651 #else
652         sum += (*v)*(*v); v++;
653 #endif
654       }
655       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
656       *norm = sqrt(*norm);
657     } else {
658       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
659     }
660   }
661   PetscFunctionReturn(0);
662 }
663 
664 
665 /*
666   Creates the hash table, and sets the table
667   This table is created only once.
668   If new entried need to be added to the matrix
669   then the hash table has to be destroyed and
670   recreated.
671 */
672 #undef __FUNC__
673 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
674 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
675 {
676   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
677   Mat         A = baij->A, B=baij->B;
678   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
679   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
680   int         size,bs2=baij->bs2,rstart=baij->rstart;
681   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
682   int         *HT,key;
683   Scalar      **HD;
684   double      tmp;
685 #if defined(USE_PETSC_BOPT_g)
686   int         ct=0,max=0;
687 #endif
688 
689   PetscFunctionBegin;
690   baij->ht_size=(int)(factor*nz);
691   size = baij->ht_size;
692 
693   if (baij->ht) {
694     PetscFunctionReturn(0);
695   }
696 
697   /* Allocate Memory for Hash Table */
698   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
699   baij->ht = (int*)(baij->hd + size);
700   HD = baij->hd;
701   HT = baij->ht;
702 
703 
704   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
705 
706 
707   /* Loop Over A */
708   for ( i=0; i<a->mbs; i++ ) {
709     for ( j=ai[i]; j<ai[i+1]; j++ ) {
710       row = i+rstart;
711       col = aj[j]+cstart;
712 
713       key = row*Nbs + col + 1;
714       h1  = HASH(size,key,tmp);
715       for ( k=0; k<size; k++ ){
716         if (HT[(h1+k)%size] == 0.0) {
717           HT[(h1+k)%size] = key;
718           HD[(h1+k)%size] = a->a + j*bs2;
719           break;
720 #if defined(USE_PETSC_BOPT_g)
721         } else {
722           ct++;
723 #endif
724         }
725       }
726 #if defined(USE_PETSC_BOPT_g)
727       if (k> max) max = k;
728 #endif
729     }
730   }
731   /* Loop Over B */
732   for ( i=0; i<b->mbs; i++ ) {
733     for ( j=bi[i]; j<bi[i+1]; j++ ) {
734       row = i+rstart;
735       col = garray[bj[j]];
736       key = row*Nbs + col + 1;
737       h1  = HASH(size,key,tmp);
738       for ( k=0; k<size; k++ ){
739         if (HT[(h1+k)%size] == 0.0) {
740           HT[(h1+k)%size] = key;
741           HD[(h1+k)%size] = b->a + j*bs2;
742           break;
743 #if defined(USE_PETSC_BOPT_g)
744         } else {
745           ct++;
746 #endif
747         }
748       }
749 #if defined(USE_PETSC_BOPT_g)
750       if (k> max) max = k;
751 #endif
752     }
753   }
754 
755   /* Print Summary */
756 #if defined(USE_PETSC_BOPT_g)
757   for ( i=0,j=0; i<size; i++)
758     if (HT[i]) {j++;}
759   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
760            (j== 0)? 0.0:((double)(ct+j))/j,max);
761 #endif
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNC__
766 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
767 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
768 {
769   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
770   int         ierr,nstash,reallocs;
771   InsertMode  addv;
772 
773   PetscFunctionBegin;
774   if (baij->donotstash) {
775     PetscFunctionReturn(0);
776   }
777 
778   /* make sure all processors are either in INSERTMODE or ADDMODE */
779   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
780   if (addv == (ADD_VALUES|INSERT_VALUES)) {
781     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
782   }
783   mat->insertmode = addv; /* in case this processor had no cache */
784 
785   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs); CHKERRQ(ierr);
786   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners); CHKERRQ(ierr);
787   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr);
788   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",
789            nstash,reallocs);
790   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs); CHKERRQ(ierr);
791   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",
792            nstash,reallocs);
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNC__
797 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
798 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
799 {
800   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
801   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
802   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
803   int         *row,*col,other_disassembled,r1,r2,r3;
804   Scalar      *val;
805   InsertMode  addv = mat->insertmode;
806 
807   PetscFunctionBegin;
808   if (!baij->donotstash) {
809     while (1) {
810       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
811       if (!flg) break;
812 
813       for ( i=0; i<n; ) {
814         /* Now identify the consecutive vals belonging to the same row */
815         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
816         if (j < n) ncols = j-i;
817         else       ncols = n-i;
818         /* Now assemble all these values with a single function call */
819         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
820         i = j;
821       }
822     }
823     ierr = MatStashScatterEnd_Private(&mat->stash); CHKERRQ(ierr);
824     /* Now process the block-stash. Since the values are stashed column-oriented,
825        set the roworiented flag to column oriented, and after MatSetValues()
826        restore the original flags */
827     r1 = baij->roworiented;
828     r2 = a->roworiented;
829     r3 = b->roworiented;
830     baij->roworiented = 0;
831     a->roworiented    = 0;
832     b->roworiented    = 0;
833     while (1) {
834       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
835       if (!flg) break;
836 
837       for ( i=0; i<n; ) {
838         /* Now identify the consecutive vals belonging to the same row */
839         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
840         if (j < n) ncols = j-i;
841         else       ncols = n-i;
842         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr);
843         i = j;
844       }
845     }
846     ierr = MatStashScatterEnd_Private(&mat->bstash); CHKERRQ(ierr);
847     baij->roworiented = r1;
848     a->roworiented    = r2;
849     b->roworiented    = r3;
850   }
851 
852   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
853   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
854 
855   /* determine if any processor has disassembled, if so we must
856      also disassemble ourselfs, in order that we may reassemble. */
857   /*
858      if nonzero structure of submatrix B cannot change then we know that
859      no processor disassembled thus we can skip this stuff
860   */
861   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
862     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
863     if (mat->was_assembled && !other_disassembled) {
864       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
865     }
866   }
867 
868   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
869     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
870   }
871   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
872   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
873 
874 #if defined(USE_PETSC_BOPT_g)
875   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
876     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
877              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
878     baij->ht_total_ct  = 0;
879     baij->ht_insert_ct = 0;
880   }
881 #endif
882   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
883     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
884     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
885     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
886   }
887 
888   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNC__
893 #define __FUNC__ "MatView_MPIBAIJ_Binary"
894 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
895 {
896   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
897   int          ierr;
898 
899   PetscFunctionBegin;
900   if (baij->size == 1) {
901     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
902   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
903   PetscFunctionReturn(0);
904 }
905 
906 #undef __FUNC__
907 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
908 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
909 {
910   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
911   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
912   FILE         *fd;
913   ViewerType   vtype;
914 
915   PetscFunctionBegin;
916   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
917   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
918     ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr);
919     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
920       MatInfo info;
921       MPI_Comm_rank(mat->comm,&rank);
922       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
923       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
924       PetscSequentialPhaseBegin(mat->comm,1);
925       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
926               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
927               baij->bs,(int)info.memory);
928       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
929       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
930       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
931       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
932       fflush(fd);
933       PetscSequentialPhaseEnd(mat->comm,1);
934       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
935       PetscFunctionReturn(0);
936     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
937       PetscPrintf(mat->comm,"  block size is %d\n",bs);
938       PetscFunctionReturn(0);
939     }
940   }
941 
942   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
943     Draw       draw;
944     PetscTruth isnull;
945     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
946     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
947   }
948 
949   if (size == 1) {
950     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
951   } else {
952     /* assemble the entire matrix onto first processor. */
953     Mat         A;
954     Mat_SeqBAIJ *Aloc;
955     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
956     int         mbs = baij->mbs;
957     Scalar      *a;
958 
959     if (!rank) {
960       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
961     } else {
962       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
963     }
964     PLogObjectParent(mat,A);
965 
966     /* copy over the A part */
967     Aloc = (Mat_SeqBAIJ*) baij->A->data;
968     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
969     rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
970 
971     for ( i=0; i<mbs; i++ ) {
972       rvals[0] = bs*(baij->rstart + i);
973       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
974       for ( j=ai[i]; j<ai[i+1]; j++ ) {
975         col = (baij->cstart+aj[j])*bs;
976         for (k=0; k<bs; k++ ) {
977           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
978           col++; a += bs;
979         }
980       }
981     }
982     /* copy over the B part */
983     Aloc = (Mat_SeqBAIJ*) baij->B->data;
984     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
985     for ( i=0; i<mbs; i++ ) {
986       rvals[0] = bs*(baij->rstart + i);
987       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
988       for ( j=ai[i]; j<ai[i+1]; j++ ) {
989         col = baij->garray[aj[j]]*bs;
990         for (k=0; k<bs; k++ ) {
991           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
992           col++; a += bs;
993         }
994       }
995     }
996     PetscFree(rvals);
997     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
998     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
999     /*
1000        Everyone has to call to draw the matrix since the graphics waits are
1001        synchronized across all processors that share the Draw object
1002     */
1003     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1004       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1005     }
1006     ierr = MatDestroy(A); CHKERRQ(ierr);
1007   }
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 
1012 
1013 #undef __FUNC__
1014 #define __FUNC__ "MatView_MPIBAIJ"
1015 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1016 {
1017   int         ierr;
1018   ViewerType  vtype;
1019 
1020   PetscFunctionBegin;
1021   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1022   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1023       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
1024     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
1025   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1026     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1027   } else {
1028     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1029   }
1030   PetscFunctionReturn(0);
1031 }
1032 
1033 #undef __FUNC__
1034 #define __FUNC__ "MatDestroy_MPIBAIJ"
1035 int MatDestroy_MPIBAIJ(Mat mat)
1036 {
1037   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1038   int         ierr;
1039 
1040   PetscFunctionBegin;
1041   if (--mat->refct > 0) PetscFunctionReturn(0);
1042 
1043   if (mat->mapping) {
1044     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
1045   }
1046   if (mat->bmapping) {
1047     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
1048   }
1049   if (mat->rmap) {
1050     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1051   }
1052   if (mat->cmap) {
1053     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1054   }
1055 #if defined(USE_PETSC_LOG)
1056   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1057 #endif
1058 
1059   ierr = MatStashDestroy_Private(&mat->stash); CHKERRQ(ierr);
1060   ierr = MatStashDestroy_Private(&mat->bstash); CHKERRQ(ierr);
1061 
1062   PetscFree(baij->rowners);
1063   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1064   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1065 #if defined (USE_CTABLE)
1066   if (baij->colmap) TableDelete(baij->colmap);
1067 #else
1068   if (baij->colmap) PetscFree(baij->colmap);
1069 #endif
1070   if (baij->garray) PetscFree(baij->garray);
1071   if (baij->lvec)   VecDestroy(baij->lvec);
1072   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1073   if (baij->rowvalues) PetscFree(baij->rowvalues);
1074   if (baij->barray) PetscFree(baij->barray);
1075   if (baij->hd) PetscFree(baij->hd);
1076   PetscFree(baij);
1077   PLogObjectDestroy(mat);
1078   PetscHeaderDestroy(mat);
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNC__
1083 #define __FUNC__ "MatMult_MPIBAIJ"
1084 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1085 {
1086   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1087   int         ierr, nt;
1088 
1089   PetscFunctionBegin;
1090   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1091   if (nt != a->n) {
1092     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1093   }
1094   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1095   if (nt != a->m) {
1096     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1097   }
1098   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1099   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1100   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1101   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1102   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNC__
1107 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1108 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1111   int        ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1115   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1116   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1117   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNC__
1122 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1123 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1124 {
1125   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1126   int         ierr;
1127 
1128   PetscFunctionBegin;
1129   /* do nondiagonal part */
1130   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1131   /* send it on its way */
1132   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1133   /* do local part */
1134   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1135   /* receive remote parts: note this assumes the values are not actually */
1136   /* inserted in yy until the next line, which is true for my implementation*/
1137   /* but is not perhaps always true. */
1138   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNC__
1143 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1144 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1145 {
1146   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1147   int         ierr;
1148 
1149   PetscFunctionBegin;
1150   /* do nondiagonal part */
1151   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1152   /* send it on its way */
1153   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1154   /* do local part */
1155   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1156   /* receive remote parts: note this assumes the values are not actually */
1157   /* inserted in yy until the next line, which is true for my implementation*/
1158   /* but is not perhaps always true. */
1159   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 /*
1164   This only works correctly for square matrices where the subblock A->A is the
1165    diagonal block
1166 */
1167 #undef __FUNC__
1168 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1169 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1170 {
1171   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1172   int         ierr;
1173 
1174   PetscFunctionBegin;
1175   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1176   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNC__
1181 #define __FUNC__ "MatScale_MPIBAIJ"
1182 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1183 {
1184   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1185   int         ierr;
1186 
1187   PetscFunctionBegin;
1188   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1189   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNC__
1194 #define __FUNC__ "MatGetSize_MPIBAIJ"
1195 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1196 {
1197   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1198 
1199   PetscFunctionBegin;
1200   if (m) *m = mat->M;
1201   if (n) *n = mat->N;
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNC__
1206 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1207 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1208 {
1209   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1210 
1211   PetscFunctionBegin;
1212   *m = mat->m; *n = mat->n;
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 #undef __FUNC__
1217 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1218 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1219 {
1220   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1221 
1222   PetscFunctionBegin;
1223   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNC__
1228 #define __FUNC__ "MatGetRow_MPIBAIJ"
1229 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1230 {
1231   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1232   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1233   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1234   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1235   int        *cmap, *idx_p,cstart = mat->cstart;
1236 
1237   PetscFunctionBegin;
1238   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1239   mat->getrowactive = PETSC_TRUE;
1240 
1241   if (!mat->rowvalues && (idx || v)) {
1242     /*
1243         allocate enough space to hold information from the longest row.
1244     */
1245     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1246     int     max = 1,mbs = mat->mbs,tmp;
1247     for ( i=0; i<mbs; i++ ) {
1248       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1249       if (max < tmp) { max = tmp; }
1250     }
1251     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1252     CHKPTRQ(mat->rowvalues);
1253     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1254   }
1255 
1256   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1257   lrow = row - brstart;
1258 
1259   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1260   if (!v)   {pvA = 0; pvB = 0;}
1261   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1262   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1263   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1264   nztot = nzA + nzB;
1265 
1266   cmap  = mat->garray;
1267   if (v  || idx) {
1268     if (nztot) {
1269       /* Sort by increasing column numbers, assuming A and B already sorted */
1270       int imark = -1;
1271       if (v) {
1272         *v = v_p = mat->rowvalues;
1273         for ( i=0; i<nzB; i++ ) {
1274           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1275           else break;
1276         }
1277         imark = i;
1278         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1279         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1280       }
1281       if (idx) {
1282         *idx = idx_p = mat->rowindices;
1283         if (imark > -1) {
1284           for ( i=0; i<imark; i++ ) {
1285             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1286           }
1287         } else {
1288           for ( i=0; i<nzB; i++ ) {
1289             if (cmap[cworkB[i]/bs] < cstart)
1290               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1291             else break;
1292           }
1293           imark = i;
1294         }
1295         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1296         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1297       }
1298     } else {
1299       if (idx) *idx = 0;
1300       if (v)   *v   = 0;
1301     }
1302   }
1303   *nz = nztot;
1304   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1305   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNC__
1310 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1311 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1312 {
1313   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1314 
1315   PetscFunctionBegin;
1316   if (baij->getrowactive == PETSC_FALSE) {
1317     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1318   }
1319   baij->getrowactive = PETSC_FALSE;
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNC__
1324 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1325 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1326 {
1327   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1328 
1329   PetscFunctionBegin;
1330   *bs = baij->bs;
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 #undef __FUNC__
1335 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1336 int MatZeroEntries_MPIBAIJ(Mat A)
1337 {
1338   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1339   int         ierr;
1340 
1341   PetscFunctionBegin;
1342   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1343   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNC__
1348 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1349 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1350 {
1351   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1352   Mat         A = a->A, B = a->B;
1353   int         ierr;
1354   double      isend[5], irecv[5];
1355 
1356   PetscFunctionBegin;
1357   info->block_size     = (double)a->bs;
1358   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1359   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1360   isend[3] = info->memory;  isend[4] = info->mallocs;
1361   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1362   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1363   isend[3] += info->memory;  isend[4] += info->mallocs;
1364   if (flag == MAT_LOCAL) {
1365     info->nz_used      = isend[0];
1366     info->nz_allocated = isend[1];
1367     info->nz_unneeded  = isend[2];
1368     info->memory       = isend[3];
1369     info->mallocs      = isend[4];
1370   } else if (flag == MAT_GLOBAL_MAX) {
1371     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1372     info->nz_used      = irecv[0];
1373     info->nz_allocated = irecv[1];
1374     info->nz_unneeded  = irecv[2];
1375     info->memory       = irecv[3];
1376     info->mallocs      = irecv[4];
1377   } else if (flag == MAT_GLOBAL_SUM) {
1378     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1379     info->nz_used      = irecv[0];
1380     info->nz_allocated = irecv[1];
1381     info->nz_unneeded  = irecv[2];
1382     info->memory       = irecv[3];
1383     info->mallocs      = irecv[4];
1384   } else {
1385     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1386   }
1387   info->rows_global       = (double)a->M;
1388   info->columns_global    = (double)a->N;
1389   info->rows_local        = (double)a->m;
1390   info->columns_local     = (double)a->N;
1391   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1392   info->fill_ratio_needed = 0;
1393   info->factor_mallocs    = 0;
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 #undef __FUNC__
1398 #define __FUNC__ "MatSetOption_MPIBAIJ"
1399 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1400 {
1401   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1402   int         ierr;
1403 
1404   PetscFunctionBegin;
1405   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1406       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1407       op == MAT_COLUMNS_UNSORTED ||
1408       op == MAT_COLUMNS_SORTED ||
1409       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1410       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1411         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1412         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1413   } else if (op == MAT_ROW_ORIENTED) {
1414         a->roworiented = 1;
1415         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1416         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1417   } else if (op == MAT_ROWS_SORTED ||
1418              op == MAT_ROWS_UNSORTED ||
1419              op == MAT_SYMMETRIC ||
1420              op == MAT_STRUCTURALLY_SYMMETRIC ||
1421              op == MAT_YES_NEW_DIAGONALS ||
1422              op == MAT_USE_HASH_TABLE) {
1423     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1424   } else if (op == MAT_COLUMN_ORIENTED) {
1425     a->roworiented = 0;
1426     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1427     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1428   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1429     a->donotstash = 1;
1430   } else if (op == MAT_NO_NEW_DIAGONALS) {
1431     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1432   } else if (op == MAT_USE_HASH_TABLE) {
1433     a->ht_flag = 1;
1434   } else {
1435     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1436   }
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNC__
1441 #define __FUNC__ "MatTranspose_MPIBAIJ("
1442 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1443 {
1444   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1445   Mat_SeqBAIJ *Aloc;
1446   Mat        B;
1447   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1448   int        bs=baij->bs,mbs=baij->mbs;
1449   Scalar     *a;
1450 
1451   PetscFunctionBegin;
1452   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1453   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1454   CHKERRQ(ierr);
1455 
1456   /* copy over the A part */
1457   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1458   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1459   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1460 
1461   for ( i=0; i<mbs; i++ ) {
1462     rvals[0] = bs*(baij->rstart + i);
1463     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1464     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1465       col = (baij->cstart+aj[j])*bs;
1466       for (k=0; k<bs; k++ ) {
1467         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1468         col++; a += bs;
1469       }
1470     }
1471   }
1472   /* copy over the B part */
1473   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1474   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1475   for ( i=0; i<mbs; i++ ) {
1476     rvals[0] = bs*(baij->rstart + i);
1477     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1478     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1479       col = baij->garray[aj[j]]*bs;
1480       for (k=0; k<bs; k++ ) {
1481         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1482         col++; a += bs;
1483       }
1484     }
1485   }
1486   PetscFree(rvals);
1487   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1488   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1489 
1490   if (matout != PETSC_NULL) {
1491     *matout = B;
1492   } else {
1493     PetscOps *Abops;
1494     MatOps   Aops;
1495 
1496     /* This isn't really an in-place transpose .... but free data structures from baij */
1497     PetscFree(baij->rowners);
1498     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1499     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1500 #if defined (USE_CTABLE)
1501     if (baij->colmap) TableDelete(baij->colmap);
1502 #else
1503     if (baij->colmap) PetscFree(baij->colmap);
1504 #endif
1505     if (baij->garray) PetscFree(baij->garray);
1506     if (baij->lvec) VecDestroy(baij->lvec);
1507     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1508     PetscFree(baij);
1509 
1510     /*
1511        This is horrible, horrible code. We need to keep the
1512       A pointers for the bops and ops but copy everything
1513       else from C.
1514     */
1515     Abops = A->bops;
1516     Aops  = A->ops;
1517     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1518     A->bops = Abops;
1519     A->ops  = Aops;
1520 
1521     PetscHeaderDestroy(B);
1522   }
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNC__
1527 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1528 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1529 {
1530   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1531   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1532   int ierr,s1,s2,s3;
1533 
1534   PetscFunctionBegin;
1535   if (ll)  {
1536     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1537     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1538     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1539     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1540     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1541   }
1542   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNC__
1547 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1548 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1549 {
1550   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1551   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1552   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1553   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1554   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1555   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1556   MPI_Comm       comm = A->comm;
1557   MPI_Request    *send_waits,*recv_waits;
1558   MPI_Status     recv_status,*send_status;
1559   IS             istmp;
1560 
1561   PetscFunctionBegin;
1562   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1563   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1564 
1565   /*  first count number of contributors to each processor */
1566   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1567   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1568   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1569   for ( i=0; i<N; i++ ) {
1570     idx = rows[i];
1571     found = 0;
1572     for ( j=0; j<size; j++ ) {
1573       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1574         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1575       }
1576     }
1577     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1578   }
1579   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1580 
1581   /* inform other processors of number of messages and max length*/
1582   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1583   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1584   nrecvs = work[rank];
1585   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1586   nmax   = work[rank];
1587   PetscFree(work);
1588 
1589   /* post receives:   */
1590   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1591   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1592   for ( i=0; i<nrecvs; i++ ) {
1593     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1594   }
1595 
1596   /* do sends:
1597      1) starts[i] gives the starting index in svalues for stuff going to
1598      the ith processor
1599   */
1600   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1601   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1602   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1603   starts[0] = 0;
1604   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1605   for ( i=0; i<N; i++ ) {
1606     svalues[starts[owner[i]]++] = rows[i];
1607   }
1608   ISRestoreIndices(is,&rows);
1609 
1610   starts[0] = 0;
1611   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1612   count = 0;
1613   for ( i=0; i<size; i++ ) {
1614     if (procs[i]) {
1615       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1616     }
1617   }
1618   PetscFree(starts);
1619 
1620   base = owners[rank]*bs;
1621 
1622   /*  wait on receives */
1623   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1624   source = lens + nrecvs;
1625   count  = nrecvs; slen = 0;
1626   while (count) {
1627     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1628     /* unpack receives into our local space */
1629     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1630     source[imdex]  = recv_status.MPI_SOURCE;
1631     lens[imdex]  = n;
1632     slen += n;
1633     count--;
1634   }
1635   PetscFree(recv_waits);
1636 
1637   /* move the data into the send scatter */
1638   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1639   count = 0;
1640   for ( i=0; i<nrecvs; i++ ) {
1641     values = rvalues + i*nmax;
1642     for ( j=0; j<lens[i]; j++ ) {
1643       lrows[count++] = values[j] - base;
1644     }
1645   }
1646   PetscFree(rvalues); PetscFree(lens);
1647   PetscFree(owner); PetscFree(nprocs);
1648 
1649   /* actually zap the local rows */
1650   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1651   PLogObjectParent(A,istmp);
1652 
1653   /*
1654         Zero the required rows. If the "diagonal block" of the matrix
1655      is square and the user wishes to set the diagonal we use seperate
1656      code so that MatSetValues() is not called for each diagonal allocating
1657      new memory, thus calling lots of mallocs and slowing things down.
1658 
1659        Contributed by: Mathew Knepley
1660   */
1661   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1662   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1663   if (diag && (l->A->M == l->A->N)) {
1664     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1665   } else if (diag) {
1666     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1667     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1668       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1669 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1670     }
1671     for ( i = 0; i < slen; i++ ) {
1672       row  = lrows[i] + rstart_bs;
1673       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1674     }
1675     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1676     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1677   } else {
1678     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1679   }
1680 
1681   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1682   PetscFree(lrows);
1683 
1684   /* wait on sends */
1685   if (nsends) {
1686     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1687     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1688     PetscFree(send_status);
1689   }
1690   PetscFree(send_waits); PetscFree(svalues);
1691 
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNC__
1696 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1697 int MatPrintHelp_MPIBAIJ(Mat A)
1698 {
1699   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1700   MPI_Comm    comm = A->comm;
1701   static int  called = 0;
1702   int         ierr;
1703 
1704   PetscFunctionBegin;
1705   if (!a->rank) {
1706     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1707   }
1708   if (called) {PetscFunctionReturn(0);} else called = 1;
1709   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1710   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 #undef __FUNC__
1715 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1716 int MatSetUnfactored_MPIBAIJ(Mat A)
1717 {
1718   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1719   int         ierr;
1720 
1721   PetscFunctionBegin;
1722   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1727 
1728 /* -------------------------------------------------------------------*/
1729 static struct _MatOps MatOps_Values = {
1730   MatSetValues_MPIBAIJ,
1731   MatGetRow_MPIBAIJ,
1732   MatRestoreRow_MPIBAIJ,
1733   MatMult_MPIBAIJ,
1734   MatMultAdd_MPIBAIJ,
1735   MatMultTrans_MPIBAIJ,
1736   MatMultTransAdd_MPIBAIJ,
1737   0,
1738   0,
1739   0,
1740   0,
1741   0,
1742   0,
1743   0,
1744   MatTranspose_MPIBAIJ,
1745   MatGetInfo_MPIBAIJ,
1746   0,
1747   MatGetDiagonal_MPIBAIJ,
1748   MatDiagonalScale_MPIBAIJ,
1749   MatNorm_MPIBAIJ,
1750   MatAssemblyBegin_MPIBAIJ,
1751   MatAssemblyEnd_MPIBAIJ,
1752   0,
1753   MatSetOption_MPIBAIJ,
1754   MatZeroEntries_MPIBAIJ,
1755   MatZeroRows_MPIBAIJ,
1756   0,
1757   0,
1758   0,
1759   0,
1760   MatGetSize_MPIBAIJ,
1761   MatGetLocalSize_MPIBAIJ,
1762   MatGetOwnershipRange_MPIBAIJ,
1763   0,
1764   0,
1765   0,
1766   0,
1767   MatDuplicate_MPIBAIJ,
1768   0,
1769   0,
1770   0,
1771   0,
1772   0,
1773   MatGetSubMatrices_MPIBAIJ,
1774   MatIncreaseOverlap_MPIBAIJ,
1775   MatGetValues_MPIBAIJ,
1776   0,
1777   MatPrintHelp_MPIBAIJ,
1778   MatScale_MPIBAIJ,
1779   0,
1780   0,
1781   0,
1782   MatGetBlockSize_MPIBAIJ,
1783   0,
1784   0,
1785   0,
1786   0,
1787   0,
1788   0,
1789   MatSetUnfactored_MPIBAIJ,
1790   0,
1791   MatSetValuesBlocked_MPIBAIJ,
1792   0,
1793   0,
1794   0,
1795   MatGetMaps_Petsc};
1796 
1797 
1798 EXTERN_C_BEGIN
1799 #undef __FUNC__
1800 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1801 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1802 {
1803   PetscFunctionBegin;
1804   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1805   *iscopy = PETSC_FALSE;
1806   PetscFunctionReturn(0);
1807 }
1808 EXTERN_C_END
1809 
1810 #undef __FUNC__
1811 #define __FUNC__ "MatCreateMPIBAIJ"
1812 /*@C
1813    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1814    (block compressed row).  For good matrix assembly performance
1815    the user should preallocate the matrix storage by setting the parameters
1816    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1817    performance can be increased by more than a factor of 50.
1818 
1819    Collective on MPI_Comm
1820 
1821    Input Parameters:
1822 +  comm - MPI communicator
1823 .  bs   - size of blockk
1824 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1825            This value should be the same as the local size used in creating the
1826            y vector for the matrix-vector product y = Ax.
1827 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1828            This value should be the same as the local size used in creating the
1829            x vector for the matrix-vector product y = Ax.
1830 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1831 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1832 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1833            submatrix  (same for all local rows)
1834 .  d_nzz - array containing the number of block nonzeros in the various block rows
1835            of the in diagonal portion of the local (possibly different for each block
1836            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1837 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1838            submatrix (same for all local rows).
1839 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1840            off-diagonal portion of the local submatrix (possibly different for
1841            each block row) or PETSC_NULL.
1842 
1843    Output Parameter:
1844 .  A - the matrix
1845 
1846    Options Database Keys:
1847 .   -mat_no_unroll - uses code that does not unroll the loops in the
1848                      block calculations (much slower)
1849 .   -mat_block_size - size of the blocks to use
1850 .   -mat_mpi - use the parallel matrix data structures even on one processor
1851                (defaults to using SeqBAIJ format on one processor)
1852 
1853    Notes:
1854    The user MUST specify either the local or global matrix dimensions
1855    (possibly both).
1856 
1857    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1858    than it must be used on all processors that share the object for that argument.
1859 
1860    Storage Information:
1861    For a square global matrix we define each processor's diagonal portion
1862    to be its local rows and the corresponding columns (a square submatrix);
1863    each processor's off-diagonal portion encompasses the remainder of the
1864    local matrix (a rectangular submatrix).
1865 
1866    The user can specify preallocated storage for the diagonal part of
1867    the local submatrix with either d_nz or d_nnz (not both).  Set
1868    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1869    memory allocation.  Likewise, specify preallocated storage for the
1870    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1871 
1872    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1873    the figure below we depict these three local rows and all columns (0-11).
1874 
1875 .vb
1876            0 1 2 3 4 5 6 7 8 9 10 11
1877           -------------------
1878    row 3  |  o o o d d d o o o o o o
1879    row 4  |  o o o d d d o o o o o o
1880    row 5  |  o o o d d d o o o o o o
1881           -------------------
1882 .ve
1883 
1884    Thus, any entries in the d locations are stored in the d (diagonal)
1885    submatrix, and any entries in the o locations are stored in the
1886    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1887    stored simply in the MATSEQBAIJ format for compressed row storage.
1888 
1889    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1890    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1891    In general, for PDE problems in which most nonzeros are near the diagonal,
1892    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1893    or you will get TERRIBLE performance; see the users' manual chapter on
1894    matrices.
1895 
1896    Level: intermediate
1897 
1898 .keywords: matrix, block, aij, compressed row, sparse, parallel
1899 
1900 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1901 @*/
1902 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1903                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1904 {
1905   Mat          B;
1906   Mat_MPIBAIJ  *b;
1907   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1908   int          flag1 = 0,flag2 = 0;
1909 
1910   PetscFunctionBegin;
1911   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1912 
1913   MPI_Comm_size(comm,&size);
1914   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
1915   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1916   if (!flag1 && !flag2 && size == 1) {
1917     if (M == PETSC_DECIDE) M = m;
1918     if (N == PETSC_DECIDE) N = n;
1919     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1920     PetscFunctionReturn(0);
1921   }
1922 
1923   *A = 0;
1924   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
1925   PLogObjectCreate(B);
1926   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1927   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1928   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1929 
1930   B->ops->destroy    = MatDestroy_MPIBAIJ;
1931   B->ops->view       = MatView_MPIBAIJ;
1932   B->mapping    = 0;
1933   B->factor     = 0;
1934   B->assembled  = PETSC_FALSE;
1935 
1936   B->insertmode = NOT_SET_VALUES;
1937   MPI_Comm_rank(comm,&b->rank);
1938   MPI_Comm_size(comm,&b->size);
1939 
1940   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1941     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1942   }
1943   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1944     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1945   }
1946   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1947     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1948   }
1949   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1950   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1951 
1952   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1953     work[0] = m; work[1] = n;
1954     mbs = m/bs; nbs = n/bs;
1955     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1956     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1957     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1958   }
1959   if (m == PETSC_DECIDE) {
1960     Mbs = M/bs;
1961     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1962     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1963     m   = mbs*bs;
1964   }
1965   if (n == PETSC_DECIDE) {
1966     Nbs = N/bs;
1967     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1968     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1969     n   = nbs*bs;
1970   }
1971   if (mbs*bs != m || nbs*bs != n) {
1972     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1973   }
1974 
1975   b->m = m; B->m = m;
1976   b->n = n; B->n = n;
1977   b->N = N; B->N = N;
1978   b->M = M; B->M = M;
1979   b->bs  = bs;
1980   b->bs2 = bs*bs;
1981   b->mbs = mbs;
1982   b->nbs = nbs;
1983   b->Mbs = Mbs;
1984   b->Nbs = Nbs;
1985 
1986   /* the information in the maps duplicates the information computed below, eventually
1987      we should remove the duplicate information that is not contained in the maps */
1988   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
1989   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
1990 
1991   /* build local table of row and column ownerships */
1992   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1993   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1994   b->cowners    = b->rowners + b->size + 2;
1995   b->rowners_bs = b->cowners + b->size + 2;
1996   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1997   b->rowners[0]    = 0;
1998   for ( i=2; i<=b->size; i++ ) {
1999     b->rowners[i] += b->rowners[i-1];
2000   }
2001   for ( i=0; i<=b->size; i++ ) {
2002     b->rowners_bs[i] = b->rowners[i]*bs;
2003   }
2004   b->rstart    = b->rowners[b->rank];
2005   b->rend      = b->rowners[b->rank+1];
2006   b->rstart_bs = b->rstart * bs;
2007   b->rend_bs   = b->rend * bs;
2008 
2009   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2010   b->cowners[0] = 0;
2011   for ( i=2; i<=b->size; i++ ) {
2012     b->cowners[i] += b->cowners[i-1];
2013   }
2014   b->cstart    = b->cowners[b->rank];
2015   b->cend      = b->cowners[b->rank+1];
2016   b->cstart_bs = b->cstart * bs;
2017   b->cend_bs   = b->cend * bs;
2018 
2019 
2020   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2021   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2022   PLogObjectParent(B,b->A);
2023   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2024   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2025   PLogObjectParent(B,b->B);
2026 
2027   /* build cache for off array entries formed */
2028   ierr = MatStashCreate_Private(comm,1,&B->stash); CHKERRQ(ierr);
2029   ierr = MatStashCreate_Private(comm,bs,&B->bstash); CHKERRQ(ierr);
2030   b->donotstash  = 0;
2031   b->colmap      = 0;
2032   b->garray      = 0;
2033   b->roworiented = 1;
2034 
2035   /* stuff used in block assembly */
2036   b->barray       = 0;
2037 
2038   /* stuff used for matrix vector multiply */
2039   b->lvec         = 0;
2040   b->Mvctx        = 0;
2041 
2042   /* stuff for MatGetRow() */
2043   b->rowindices   = 0;
2044   b->rowvalues    = 0;
2045   b->getrowactive = PETSC_FALSE;
2046 
2047   /* hash table stuff */
2048   b->ht           = 0;
2049   b->hd           = 0;
2050   b->ht_size      = 0;
2051   b->ht_flag      = 0;
2052   b->ht_fact      = 0;
2053   b->ht_total_ct  = 0;
2054   b->ht_insert_ct = 0;
2055 
2056   *A = B;
2057   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2058   if (flg) {
2059     double fact = 1.39;
2060     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2061     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2062     if (fact <= 1.0) fact = 1.39;
2063     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2064     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2065   }
2066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2067                                      "MatGetDiagonalBlock_MPIBAIJ",
2068                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNC__
2073 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2074 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2075 {
2076   Mat         mat;
2077   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2078   int         ierr, len=0, flg;
2079 
2080   PetscFunctionBegin;
2081   *newmat       = 0;
2082   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2083   PLogObjectCreate(mat);
2084   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2085   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2086   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2087   mat->ops->view       = MatView_MPIBAIJ;
2088   mat->factor          = matin->factor;
2089   mat->assembled       = PETSC_TRUE;
2090   mat->insertmode      = NOT_SET_VALUES;
2091 
2092   a->m = mat->m   = oldmat->m;
2093   a->n = mat->n   = oldmat->n;
2094   a->M = mat->M   = oldmat->M;
2095   a->N = mat->N   = oldmat->N;
2096 
2097   a->bs  = oldmat->bs;
2098   a->bs2 = oldmat->bs2;
2099   a->mbs = oldmat->mbs;
2100   a->nbs = oldmat->nbs;
2101   a->Mbs = oldmat->Mbs;
2102   a->Nbs = oldmat->Nbs;
2103 
2104   a->rstart       = oldmat->rstart;
2105   a->rend         = oldmat->rend;
2106   a->cstart       = oldmat->cstart;
2107   a->cend         = oldmat->cend;
2108   a->size         = oldmat->size;
2109   a->rank         = oldmat->rank;
2110   a->donotstash   = oldmat->donotstash;
2111   a->roworiented  = oldmat->roworiented;
2112   a->rowindices   = 0;
2113   a->rowvalues    = 0;
2114   a->getrowactive = PETSC_FALSE;
2115   a->barray       = 0;
2116   a->rstart_bs    = oldmat->rstart_bs;
2117   a->rend_bs      = oldmat->rend_bs;
2118   a->cstart_bs    = oldmat->cstart_bs;
2119   a->cend_bs      = oldmat->cend_bs;
2120 
2121   /* hash table stuff */
2122   a->ht           = 0;
2123   a->hd           = 0;
2124   a->ht_size      = 0;
2125   a->ht_flag      = oldmat->ht_flag;
2126   a->ht_fact      = oldmat->ht_fact;
2127   a->ht_total_ct  = 0;
2128   a->ht_insert_ct = 0;
2129 
2130 
2131   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2132   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2133   a->cowners    = a->rowners + a->size + 2;
2134   a->rowners_bs = a->cowners + a->size + 2;
2135   PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));
2136   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash); CHKERRQ(ierr);
2137   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash); CHKERRQ(ierr);
2138   if (oldmat->colmap) {
2139 #if defined (USE_CTABLE)
2140   ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
2141 #else
2142     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2143     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2144     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2145 #endif
2146   } else a->colmap = 0;
2147   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2148     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2149     PLogObjectMemory(mat,len*sizeof(int));
2150     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2151   } else a->garray = 0;
2152 
2153   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2154   PLogObjectParent(mat,a->lvec);
2155   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2156 
2157   PLogObjectParent(mat,a->Mvctx);
2158   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
2159   PLogObjectParent(mat,a->A);
2160   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
2161   PLogObjectParent(mat,a->B);
2162   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2163   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2164   if (flg) {
2165     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2166   }
2167   *newmat = mat;
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 #include "sys.h"
2172 
2173 #undef __FUNC__
2174 #define __FUNC__ "MatLoad_MPIBAIJ"
2175 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2176 {
2177   Mat          A;
2178   int          i, nz, ierr, j,rstart, rend, fd;
2179   Scalar       *vals,*buf;
2180   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2181   MPI_Status   status;
2182   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2183   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2184   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2185   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2186   int          dcount,kmax,k,nzcount,tmp;
2187 
2188   PetscFunctionBegin;
2189   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2190 
2191   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2192   if (!rank) {
2193     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2194     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2195     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2196     if (header[3] < 0) {
2197       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2198     }
2199   }
2200 
2201   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2202   M = header[1]; N = header[2];
2203 
2204   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2205 
2206   /*
2207      This code adds extra rows to make sure the number of rows is
2208      divisible by the blocksize
2209   */
2210   Mbs        = M/bs;
2211   extra_rows = bs - M + bs*(Mbs);
2212   if (extra_rows == bs) extra_rows = 0;
2213   else                  Mbs++;
2214   if (extra_rows &&!rank) {
2215     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2216   }
2217 
2218   /* determine ownership of all rows */
2219   mbs = Mbs/size + ((Mbs % size) > rank);
2220   m   = mbs * bs;
2221   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2222   browners = rowners + size + 1;
2223   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2224   rowners[0] = 0;
2225   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2226   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2227   rstart = rowners[rank];
2228   rend   = rowners[rank+1];
2229 
2230   /* distribute row lengths to all processors */
2231   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2232   if (!rank) {
2233     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2234     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2235     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2236     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2237     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2238     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2239     PetscFree(sndcounts);
2240   } else {
2241     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2242   }
2243 
2244   if (!rank) {
2245     /* calculate the number of nonzeros on each processor */
2246     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2247     PetscMemzero(procsnz,size*sizeof(int));
2248     for ( i=0; i<size; i++ ) {
2249       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2250         procsnz[i] += rowlengths[j];
2251       }
2252     }
2253     PetscFree(rowlengths);
2254 
2255     /* determine max buffer needed and allocate it */
2256     maxnz = 0;
2257     for ( i=0; i<size; i++ ) {
2258       maxnz = PetscMax(maxnz,procsnz[i]);
2259     }
2260     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2261 
2262     /* read in my part of the matrix column indices  */
2263     nz = procsnz[0];
2264     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2265     mycols = ibuf;
2266     if (size == 1)  nz -= extra_rows;
2267     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2268     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2269 
2270     /* read in every ones (except the last) and ship off */
2271     for ( i=1; i<size-1; i++ ) {
2272       nz   = procsnz[i];
2273       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2274       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2275     }
2276     /* read in the stuff for the last proc */
2277     if ( size != 1 ) {
2278       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2279       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2280       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2281       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2282     }
2283     PetscFree(cols);
2284   } else {
2285     /* determine buffer space needed for message */
2286     nz = 0;
2287     for ( i=0; i<m; i++ ) {
2288       nz += locrowlens[i];
2289     }
2290     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2291     mycols = ibuf;
2292     /* receive message of column indices*/
2293     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2294     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2295     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2296   }
2297 
2298   /* loop over local rows, determining number of off diagonal entries */
2299   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2300   odlens = dlens + (rend-rstart);
2301   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2302   PetscMemzero(mask,3*Mbs*sizeof(int));
2303   masked1 = mask    + Mbs;
2304   masked2 = masked1 + Mbs;
2305   rowcount = 0; nzcount = 0;
2306   for ( i=0; i<mbs; i++ ) {
2307     dcount  = 0;
2308     odcount = 0;
2309     for ( j=0; j<bs; j++ ) {
2310       kmax = locrowlens[rowcount];
2311       for ( k=0; k<kmax; k++ ) {
2312         tmp = mycols[nzcount++]/bs;
2313         if (!mask[tmp]) {
2314           mask[tmp] = 1;
2315           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2316           else masked1[dcount++] = tmp;
2317         }
2318       }
2319       rowcount++;
2320     }
2321 
2322     dlens[i]  = dcount;
2323     odlens[i] = odcount;
2324 
2325     /* zero out the mask elements we set */
2326     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2327     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2328   }
2329 
2330   /* create our matrix */
2331   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2332          CHKERRQ(ierr);
2333   A = *newmat;
2334   MatSetOption(A,MAT_COLUMNS_SORTED);
2335 
2336   if (!rank) {
2337     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2338     /* read in my part of the matrix numerical values  */
2339     nz = procsnz[0];
2340     vals = buf;
2341     mycols = ibuf;
2342     if (size == 1)  nz -= extra_rows;
2343     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2344     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2345 
2346     /* insert into matrix */
2347     jj      = rstart*bs;
2348     for ( i=0; i<m; i++ ) {
2349       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2350       mycols += locrowlens[i];
2351       vals   += locrowlens[i];
2352       jj++;
2353     }
2354     /* read in other processors (except the last one) and ship out */
2355     for ( i=1; i<size-1; i++ ) {
2356       nz   = procsnz[i];
2357       vals = buf;
2358       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2359       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2360     }
2361     /* the last proc */
2362     if ( size != 1 ){
2363       nz   = procsnz[i] - extra_rows;
2364       vals = buf;
2365       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2366       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2367       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2368     }
2369     PetscFree(procsnz);
2370   } else {
2371     /* receive numeric values */
2372     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2373 
2374     /* receive message of values*/
2375     vals   = buf;
2376     mycols = ibuf;
2377     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2378     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2379     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2380 
2381     /* insert into matrix */
2382     jj      = rstart*bs;
2383     for ( i=0; i<m; i++ ) {
2384       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2385       mycols += locrowlens[i];
2386       vals   += locrowlens[i];
2387       jj++;
2388     }
2389   }
2390   PetscFree(locrowlens);
2391   PetscFree(buf);
2392   PetscFree(ibuf);
2393   PetscFree(rowners);
2394   PetscFree(dlens);
2395   PetscFree(mask);
2396   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2397   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 
2402 
2403 #undef __FUNC__
2404 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2405 /*@
2406    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2407 
2408    Input Parameters:
2409 .  mat  - the matrix
2410 .  fact - factor
2411 
2412    Collective on Mat
2413 
2414    Level: advanced
2415 
2416   Notes:
2417    This can also be set by the command line option: -mat_use_hash_table fact
2418 
2419 .keywords: matrix, hashtable, factor, HT
2420 
2421 .seealso: MatSetOption()
2422 @*/
2423 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2424 {
2425   Mat_MPIBAIJ *baij;
2426 
2427   PetscFunctionBegin;
2428   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2429   if (mat->type != MATMPIBAIJ) {
2430       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2431   }
2432   baij = (Mat_MPIBAIJ*) mat->data;
2433   baij->ht_fact = fact;
2434   PetscFunctionReturn(0);
2435 }
2436