xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e005ede52eafe2fed2813abb7a7eb3df04d5f886)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 
4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   int i;
44   PetscScalar  *va,*vb;
45   Vec          vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
109   PetscErrorCode ierr;
110   int         nbs = B->nbs,i,bs=B->bs;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (int*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (int*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
286 {
287   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288   PetscErrorCode ierr;
289   int  i,N = m*n;
290   MatScalar   *vsingle;
291 
292   PetscFunctionBegin;
293   if (N > b->setvalueslen) {
294     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
295     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
296     b->setvalueslen  = N;
297   }
298   vsingle = b->setvaluescopy;
299 
300   for (i=0; i<N; i++) {
301     vsingle[i] = v[i];
302   }
303   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
310 {
311   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
312   PetscErrorCode ierr;
313   int i,N = m*n*b->bs2;
314   MatScalar   *vsingle;
315 
316   PetscFunctionBegin;
317   if (N > b->setvalueslen) {
318     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
319     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
320     b->setvalueslen  = N;
321   }
322   vsingle = b->setvaluescopy;
323   for (i=0; i<N; i++) {
324     vsingle[i] = v[i];
325   }
326   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
333 {
334   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
335   PetscErrorCode ierr;
336   int i,N = m*n;
337   MatScalar   *vsingle;
338 
339   PetscFunctionBegin;
340   if (N > b->setvalueslen) {
341     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
342     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
343     b->setvalueslen  = N;
344   }
345   vsingle = b->setvaluescopy;
346   for (i=0; i<N; i++) {
347     vsingle[i] = v[i];
348   }
349   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
356 {
357   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
358   PetscErrorCode ierr;
359   int i,N = m*n*b->bs2;
360   MatScalar   *vsingle;
361 
362   PetscFunctionBegin;
363   if (N > b->setvalueslen) {
364     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
365     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
366     b->setvalueslen  = N;
367   }
368   vsingle = b->setvaluescopy;
369   for (i=0; i<N; i++) {
370     vsingle[i] = v[i];
371   }
372   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 #endif
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
380 {
381   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
382   MatScalar   value;
383   PetscTruth  roworiented = baij->roworiented;
384   PetscErrorCode ierr;
385   int i,j,row,col;
386   int         rstart_orig=baij->rstart_bs;
387   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
388   int         cend_orig=baij->cend_bs,bs=baij->bs;
389 
390   /* Some Variables required in the macro */
391   Mat         A = baij->A;
392   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
393   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
394   MatScalar   *aa=a->a;
395 
396   Mat         B = baij->B;
397   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
398   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
399   MatScalar   *ba=b->a;
400 
401   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
402   int         low,high,t,ridx,cidx,bs2=a->bs2;
403   MatScalar   *ap,*bap;
404 
405   PetscFunctionBegin;
406   for (i=0; i<m; i++) {
407     if (im[i] < 0) continue;
408 #if defined(PETSC_USE_BOPT_g)
409     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
410 #endif
411     if (im[i] >= rstart_orig && im[i] < rend_orig) {
412       row = im[i] - rstart_orig;
413       for (j=0; j<n; j++) {
414         if (in[j] >= cstart_orig && in[j] < cend_orig){
415           col = in[j] - cstart_orig;
416           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
417           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
418           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_BOPT_g)
421         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!baij->colmap) {
426               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
430             col  = col - 1;
431 #else
432             col = baij->colmap[in[j]/bs] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
435               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
438               B = baij->B;
439               b = (Mat_SeqBAIJ*)(B)->data;
440               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
441               ba=b->a;
442             } else col += in[j]%bs;
443           } else col = in[j];
444           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
445           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
446           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
447         }
448       }
449     } else {
450       if (!baij->donotstash) {
451         if (roworiented) {
452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453         } else {
454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
455         }
456       }
457     }
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
467   const MatScalar *value;
468   MatScalar       *barray=baij->barray;
469   PetscTruth      roworiented = baij->roworiented;
470   PetscErrorCode ierr;
471   int i,j,ii,jj,row,col,rstart=baij->rstart;
472   int             rend=baij->rend,cstart=baij->cstart,stepval;
473   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
474 
475   PetscFunctionBegin;
476   if(!barray) {
477     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
478     baij->barray = barray;
479   }
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487     if (im[i] < 0) continue;
488 #if defined(PETSC_USE_BOPT_g)
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
490 #endif
491     if (im[i] >= rstart && im[i] < rend) {
492       row = im[i] - rstart;
493       for (j=0; j<n; j++) {
494         /* If NumCol = 1 then a copy is not required */
495         if ((roworiented) && (n == 1)) {
496           barray = (MatScalar*)v + i*bs2;
497         } else if((!roworiented) && (m == 1)) {
498           barray = (MatScalar*)v + j*bs2;
499         } else { /* Here a copy is required */
500           if (roworiented) {
501             value = v + i*(stepval+bs)*bs + j*bs;
502           } else {
503             value = v + j*(stepval+bs)*bs + i*bs;
504           }
505           for (ii=0; ii<bs; ii++,value+=stepval) {
506             for (jj=0; jj<bs; jj++) {
507               *barray++  = *value++;
508             }
509           }
510           barray -=bs2;
511         }
512 
513         if (in[j] >= cstart && in[j] < cend){
514           col  = in[j] - cstart;
515           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516         }
517         else if (in[j] < 0) continue;
518 #if defined(PETSC_USE_BOPT_g)
519         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
520 #endif
521         else {
522           if (mat->was_assembled) {
523             if (!baij->colmap) {
524               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
525             }
526 
527 #if defined(PETSC_USE_BOPT_g)
528 #if defined (PETSC_USE_CTABLE)
529             { int data;
530               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
531               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
532             }
533 #else
534             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
535 #endif
536 #endif
537 #if defined (PETSC_USE_CTABLE)
538 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
539             col  = (col - 1)/bs;
540 #else
541             col = (baij->colmap[in[j]] - 1)/bs;
542 #endif
543             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
544               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
545               col =  in[j];
546             }
547           }
548           else col = in[j];
549           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
550         }
551       }
552     } else {
553       if (!baij->donotstash) {
554         if (roworiented) {
555           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556         } else {
557           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
558         }
559       }
560     }
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #define HASH_KEY 0.6180339887
566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
567 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
568 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
569 #undef __FUNCT__
570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
572 {
573   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
574   PetscTruth  roworiented = baij->roworiented;
575   PetscErrorCode ierr;
576   int i,j,row,col;
577   int         rstart_orig=baij->rstart_bs;
578   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
579   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
580   PetscReal   tmp;
581   MatScalar   **HD = baij->hd,value;
582 #if defined(PETSC_USE_BOPT_g)
583   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
584 #endif
585 
586   PetscFunctionBegin;
587 
588   for (i=0; i<m; i++) {
589 #if defined(PETSC_USE_BOPT_g)
590     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
591     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
592 #endif
593       row = im[i];
594     if (row >= rstart_orig && row < rend_orig) {
595       for (j=0; j<n; j++) {
596         col = in[j];
597         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
598         /* Look up into the Hash Table */
599         key = (row/bs)*Nbs+(col/bs)+1;
600         h1  = HASH(size,key,tmp);
601 
602 
603         idx = h1;
604 #if defined(PETSC_USE_BOPT_g)
605         insert_ct++;
606         total_ct++;
607         if (HT[idx] != key) {
608           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
609           if (idx == size) {
610             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
611             if (idx == h1) {
612               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
613             }
614           }
615         }
616 #else
617         if (HT[idx] != key) {
618           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
619           if (idx == size) {
620             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
621             if (idx == h1) {
622               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
623             }
624           }
625         }
626 #endif
627         /* A HASH table entry is found, so insert the values at the correct address */
628         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
629         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
630       }
631     } else {
632       if (!baij->donotstash) {
633         if (roworiented) {
634           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
635         } else {
636           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
637         }
638       }
639     }
640   }
641 #if defined(PETSC_USE_BOPT_g)
642   baij->ht_total_ct = total_ct;
643   baij->ht_insert_ct = insert_ct;
644 #endif
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
651 {
652   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
653   PetscTruth      roworiented = baij->roworiented;
654   PetscErrorCode ierr;
655   int i,j,ii,jj,row,col;
656   int             rstart=baij->rstart ;
657   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
658   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
659   PetscReal       tmp;
660   MatScalar       **HD = baij->hd,*baij_a;
661   const MatScalar *v_t,*value;
662 #if defined(PETSC_USE_BOPT_g)
663   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
664 #endif
665 
666   PetscFunctionBegin;
667 
668   if (roworiented) {
669     stepval = (n-1)*bs;
670   } else {
671     stepval = (m-1)*bs;
672   }
673   for (i=0; i<m; i++) {
674 #if defined(PETSC_USE_BOPT_g)
675     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
676     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
677 #endif
678     row   = im[i];
679     v_t   = v + i*bs2;
680     if (row >= rstart && row < rend) {
681       for (j=0; j<n; j++) {
682         col = in[j];
683 
684         /* Look up into the Hash Table */
685         key = row*Nbs+col+1;
686         h1  = HASH(size,key,tmp);
687 
688         idx = h1;
689 #if defined(PETSC_USE_BOPT_g)
690         total_ct++;
691         insert_ct++;
692        if (HT[idx] != key) {
693           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
694           if (idx == size) {
695             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
696             if (idx == h1) {
697               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
698             }
699           }
700         }
701 #else
702         if (HT[idx] != key) {
703           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
704           if (idx == size) {
705             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
706             if (idx == h1) {
707               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
708             }
709           }
710         }
711 #endif
712         baij_a = HD[idx];
713         if (roworiented) {
714           /*value = v + i*(stepval+bs)*bs + j*bs;*/
715           /* value = v + (i*(stepval+bs)+j)*bs; */
716           value = v_t;
717           v_t  += bs;
718           if (addv == ADD_VALUES) {
719             for (ii=0; ii<bs; ii++,value+=stepval) {
720               for (jj=ii; jj<bs2; jj+=bs) {
721                 baij_a[jj]  += *value++;
722               }
723             }
724           } else {
725             for (ii=0; ii<bs; ii++,value+=stepval) {
726               for (jj=ii; jj<bs2; jj+=bs) {
727                 baij_a[jj]  = *value++;
728               }
729             }
730           }
731         } else {
732           value = v + j*(stepval+bs)*bs + i*bs;
733           if (addv == ADD_VALUES) {
734             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
735               for (jj=0; jj<bs; jj++) {
736                 baij_a[jj]  += *value++;
737               }
738             }
739           } else {
740             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
741               for (jj=0; jj<bs; jj++) {
742                 baij_a[jj]  = *value++;
743               }
744             }
745           }
746         }
747       }
748     } else {
749       if (!baij->donotstash) {
750         if (roworiented) {
751           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
752         } else {
753           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
754         }
755       }
756     }
757   }
758 #if defined(PETSC_USE_BOPT_g)
759   baij->ht_total_ct = total_ct;
760   baij->ht_insert_ct = insert_ct;
761 #endif
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatGetValues_MPIBAIJ"
767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
768 {
769   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
770   PetscErrorCode ierr;
771   int        bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
772   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
773 
774   PetscFunctionBegin;
775   for (i=0; i<m; i++) {
776     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
777     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
778     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
779       row = idxm[i] - bsrstart;
780       for (j=0; j<n; j++) {
781         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
782         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
783         if (idxn[j] >= bscstart && idxn[j] < bscend){
784           col = idxn[j] - bscstart;
785           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786         } else {
787           if (!baij->colmap) {
788             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
789           }
790 #if defined (PETSC_USE_CTABLE)
791           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
792           data --;
793 #else
794           data = baij->colmap[idxn[j]/bs]-1;
795 #endif
796           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
797           else {
798             col  = data + idxn[j]%bs;
799             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
800           }
801         }
802       }
803     } else {
804       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
805     }
806   }
807  PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatNorm_MPIBAIJ"
812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
813 {
814   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
815   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
816   PetscErrorCode ierr;
817   int i,bs2=baij->bs2;
818   PetscReal  sum = 0.0;
819   MatScalar  *v;
820 
821   PetscFunctionBegin;
822   if (baij->size == 1) {
823     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
824   } else {
825     if (type == NORM_FROBENIUS) {
826       v = amat->a;
827       for (i=0; i<amat->nz*bs2; i++) {
828 #if defined(PETSC_USE_COMPLEX)
829         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
830 #else
831         sum += (*v)*(*v); v++;
832 #endif
833       }
834       v = bmat->a;
835       for (i=0; i<bmat->nz*bs2; i++) {
836 #if defined(PETSC_USE_COMPLEX)
837         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
838 #else
839         sum += (*v)*(*v); v++;
840 #endif
841       }
842       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
843       *nrm = sqrt(*nrm);
844     } else {
845       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
846     }
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 
852 /*
853   Creates the hash table, and sets the table
854   This table is created only once.
855   If new entried need to be added to the matrix
856   then the hash table has to be destroyed and
857   recreated.
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
862 {
863   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
864   Mat         A = baij->A,B=baij->B;
865   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
866   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
867   PetscErrorCode ierr;
868   int         size,bs2=baij->bs2,rstart=baij->rstart;
869   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
870   int         *HT,key;
871   MatScalar   **HD;
872   PetscReal   tmp;
873 #if defined(PETSC_USE_BOPT_g)
874   int         ct=0,max=0;
875 #endif
876 
877   PetscFunctionBegin;
878   baij->ht_size=(int)(factor*nz);
879   size = baij->ht_size;
880 
881   if (baij->ht) {
882     PetscFunctionReturn(0);
883   }
884 
885   /* Allocate Memory for Hash Table */
886   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
887   baij->ht = (int*)(baij->hd + size);
888   HD       = baij->hd;
889   HT       = baij->ht;
890 
891 
892   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
893 
894 
895   /* Loop Over A */
896   for (i=0; i<a->mbs; i++) {
897     for (j=ai[i]; j<ai[i+1]; j++) {
898       row = i+rstart;
899       col = aj[j]+cstart;
900 
901       key = row*Nbs + col + 1;
902       h1  = HASH(size,key,tmp);
903       for (k=0; k<size; k++){
904         if (!HT[(h1+k)%size]) {
905           HT[(h1+k)%size] = key;
906           HD[(h1+k)%size] = a->a + j*bs2;
907           break;
908 #if defined(PETSC_USE_BOPT_g)
909         } else {
910           ct++;
911 #endif
912         }
913       }
914 #if defined(PETSC_USE_BOPT_g)
915       if (k> max) max = k;
916 #endif
917     }
918   }
919   /* Loop Over B */
920   for (i=0; i<b->mbs; i++) {
921     for (j=bi[i]; j<bi[i+1]; j++) {
922       row = i+rstart;
923       col = garray[bj[j]];
924       key = row*Nbs + col + 1;
925       h1  = HASH(size,key,tmp);
926       for (k=0; k<size; k++){
927         if (!HT[(h1+k)%size]) {
928           HT[(h1+k)%size] = key;
929           HD[(h1+k)%size] = b->a + j*bs2;
930           break;
931 #if defined(PETSC_USE_BOPT_g)
932         } else {
933           ct++;
934 #endif
935         }
936       }
937 #if defined(PETSC_USE_BOPT_g)
938       if (k> max) max = k;
939 #endif
940     }
941   }
942 
943   /* Print Summary */
944 #if defined(PETSC_USE_BOPT_g)
945   for (i=0,j=0; i<size; i++) {
946     if (HT[i]) {j++;}
947   }
948   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
949 #endif
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
956 {
957   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
958   PetscErrorCode ierr;
959   int nstash,reallocs;
960   InsertMode  addv;
961 
962   PetscFunctionBegin;
963   if (baij->donotstash) {
964     PetscFunctionReturn(0);
965   }
966 
967   /* make sure all processors are either in INSERTMODE or ADDMODE */
968   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
969   if (addv == (ADD_VALUES|INSERT_VALUES)) {
970     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
971   }
972   mat->insertmode = addv; /* in case this processor had no cache */
973 
974   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
975   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
976   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
977   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
978   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
979   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
986 {
987   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
988   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
989   PetscErrorCode ierr;
990   int         i,j,rstart,ncols,n,flg,bs2=baij->bs2;
991   int         *row,*col,other_disassembled;
992   PetscTruth  r1,r2,r3;
993   MatScalar   *val;
994   InsertMode  addv = mat->insertmode;
995 
996   PetscFunctionBegin;
997   if (!baij->donotstash) {
998     while (1) {
999       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1000       if (!flg) break;
1001 
1002       for (i=0; i<n;) {
1003         /* Now identify the consecutive vals belonging to the same row */
1004         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1005         if (j < n) ncols = j-i;
1006         else       ncols = n-i;
1007         /* Now assemble all these values with a single function call */
1008         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1009         i = j;
1010       }
1011     }
1012     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1013     /* Now process the block-stash. Since the values are stashed column-oriented,
1014        set the roworiented flag to column oriented, and after MatSetValues()
1015        restore the original flags */
1016     r1 = baij->roworiented;
1017     r2 = a->roworiented;
1018     r3 = b->roworiented;
1019     baij->roworiented = PETSC_FALSE;
1020     a->roworiented    = PETSC_FALSE;
1021     b->roworiented    = PETSC_FALSE;
1022     while (1) {
1023       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1024       if (!flg) break;
1025 
1026       for (i=0; i<n;) {
1027         /* Now identify the consecutive vals belonging to the same row */
1028         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1029         if (j < n) ncols = j-i;
1030         else       ncols = n-i;
1031         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1032         i = j;
1033       }
1034     }
1035     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1036     baij->roworiented = r1;
1037     a->roworiented    = r2;
1038     b->roworiented    = r3;
1039   }
1040 
1041   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1042   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1043 
1044   /* determine if any processor has disassembled, if so we must
1045      also disassemble ourselfs, in order that we may reassemble. */
1046   /*
1047      if nonzero structure of submatrix B cannot change then we know that
1048      no processor disassembled thus we can skip this stuff
1049   */
1050   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1051     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1052     if (mat->was_assembled && !other_disassembled) {
1053       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1054     }
1055   }
1056 
1057   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1058     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1059   }
1060   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1061   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1062 
1063 #if defined(PETSC_USE_BOPT_g)
1064   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1065     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1066     baij->ht_total_ct  = 0;
1067     baij->ht_insert_ct = 0;
1068   }
1069 #endif
1070   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1071     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1072     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1073     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1074   }
1075 
1076   if (baij->rowvalues) {
1077     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1078     baij->rowvalues = 0;
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1085 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1086 {
1087   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1088   PetscErrorCode ierr;
1089   int bs = baij->bs,size = baij->size,rank = baij->rank;
1090   PetscTruth        iascii,isdraw;
1091   PetscViewer       sviewer;
1092   PetscViewerFormat format;
1093 
1094   PetscFunctionBegin;
1095   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1096   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1097   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1098   if (iascii) {
1099     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1100     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1101       MatInfo info;
1102       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1103       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1104       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1105               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1106               baij->bs,(int)info.memory);CHKERRQ(ierr);
1107       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1108       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1109       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1110       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1111       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1112       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1113       PetscFunctionReturn(0);
1114     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1115       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1116       PetscFunctionReturn(0);
1117     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1118       PetscFunctionReturn(0);
1119     }
1120   }
1121 
1122   if (isdraw) {
1123     PetscDraw       draw;
1124     PetscTruth isnull;
1125     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1126     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1127   }
1128 
1129   if (size == 1) {
1130     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1131     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1132   } else {
1133     /* assemble the entire matrix onto first processor. */
1134     Mat         A;
1135     Mat_SeqBAIJ *Aloc;
1136     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1137     MatScalar   *a;
1138 
1139     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1140     /* Perhaps this should be the type of mat? */
1141     if (!rank) {
1142       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1143     } else {
1144       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1145     }
1146     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1147     ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1148     PetscLogObjectParent(mat,A);
1149 
1150     /* copy over the A part */
1151     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1152     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1153     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1154 
1155     for (i=0; i<mbs; i++) {
1156       rvals[0] = bs*(baij->rstart + i);
1157       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1158       for (j=ai[i]; j<ai[i+1]; j++) {
1159         col = (baij->cstart+aj[j])*bs;
1160         for (k=0; k<bs; k++) {
1161           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1162           col++; a += bs;
1163         }
1164       }
1165     }
1166     /* copy over the B part */
1167     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1168     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1169     for (i=0; i<mbs; i++) {
1170       rvals[0] = bs*(baij->rstart + i);
1171       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1172       for (j=ai[i]; j<ai[i+1]; j++) {
1173         col = baij->garray[aj[j]]*bs;
1174         for (k=0; k<bs; k++) {
1175           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1176           col++; a += bs;
1177         }
1178       }
1179     }
1180     ierr = PetscFree(rvals);CHKERRQ(ierr);
1181     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1182     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1183     /*
1184        Everyone has to call to draw the matrix since the graphics waits are
1185        synchronized across all processors that share the PetscDraw object
1186     */
1187     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1188     if (!rank) {
1189       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1190       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1191     }
1192     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1193     ierr = MatDestroy(A);CHKERRQ(ierr);
1194   }
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatView_MPIBAIJ"
1200 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1201 {
1202   PetscErrorCode ierr;
1203   PetscTruth iascii,isdraw,issocket,isbinary;
1204 
1205   PetscFunctionBegin;
1206   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1207   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1208   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1209   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1210   if (iascii || isdraw || issocket || isbinary) {
1211     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1212   } else {
1213     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1214   }
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1220 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1221 {
1222   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226 #if defined(PETSC_USE_LOG)
1227   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1228 #endif
1229   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1230   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1231   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1232   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1233   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1234 #if defined (PETSC_USE_CTABLE)
1235   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1236 #else
1237   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1238 #endif
1239   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1240   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1241   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1242   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1243   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1244   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1245 #if defined(PETSC_USE_MAT_SINGLE)
1246   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1247 #endif
1248   ierr = PetscFree(baij);CHKERRQ(ierr);
1249 
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1252   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1253   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1254   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1255   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatMult_MPIBAIJ"
1261 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1262 {
1263   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1264   PetscErrorCode ierr;
1265   int nt;
1266 
1267   PetscFunctionBegin;
1268   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1269   if (nt != A->n) {
1270     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1271   }
1272   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1273   if (nt != A->m) {
1274     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1275   }
1276   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1277   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1278   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1279   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1280   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1286 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1287 {
1288   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1289   PetscErrorCode ierr;
1290 
1291   PetscFunctionBegin;
1292   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1293   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1294   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1295   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1301 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1302 {
1303   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1304   PetscErrorCode ierr;
1305 
1306   PetscFunctionBegin;
1307   /* do nondiagonal part */
1308   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1309   /* send it on its way */
1310   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1311   /* do local part */
1312   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1313   /* receive remote parts: note this assumes the values are not actually */
1314   /* inserted in yy until the next line, which is true for my implementation*/
1315   /* but is not perhaps always true. */
1316   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1322 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1323 {
1324   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   /* do nondiagonal part */
1329   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1330   /* send it on its way */
1331   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1332   /* do local part */
1333   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1334   /* receive remote parts: note this assumes the values are not actually */
1335   /* inserted in yy until the next line, which is true for my implementation*/
1336   /* but is not perhaps always true. */
1337   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 /*
1342   This only works correctly for square matrices where the subblock A->A is the
1343    diagonal block
1344 */
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1347 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1348 {
1349   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1350   PetscErrorCode ierr;
1351 
1352   PetscFunctionBegin;
1353   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1354   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatScale_MPIBAIJ"
1360 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1361 {
1362   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1363   PetscErrorCode ierr;
1364 
1365   PetscFunctionBegin;
1366   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1367   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1373 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1374 {
1375   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1376   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1377   PetscErrorCode ierr;
1378   int          bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1379   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1380   int          *cmap,*idx_p,cstart = mat->cstart;
1381 
1382   PetscFunctionBegin;
1383   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1384   mat->getrowactive = PETSC_TRUE;
1385 
1386   if (!mat->rowvalues && (idx || v)) {
1387     /*
1388         allocate enough space to hold information from the longest row.
1389     */
1390     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1391     int     max = 1,mbs = mat->mbs,tmp;
1392     for (i=0; i<mbs; i++) {
1393       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1394       if (max < tmp) { max = tmp; }
1395     }
1396     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1397     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1398   }
1399 
1400   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1401   lrow = row - brstart;
1402 
1403   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1404   if (!v)   {pvA = 0; pvB = 0;}
1405   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1406   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1407   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1408   nztot = nzA + nzB;
1409 
1410   cmap  = mat->garray;
1411   if (v  || idx) {
1412     if (nztot) {
1413       /* Sort by increasing column numbers, assuming A and B already sorted */
1414       int imark = -1;
1415       if (v) {
1416         *v = v_p = mat->rowvalues;
1417         for (i=0; i<nzB; i++) {
1418           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1419           else break;
1420         }
1421         imark = i;
1422         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1423         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1424       }
1425       if (idx) {
1426         *idx = idx_p = mat->rowindices;
1427         if (imark > -1) {
1428           for (i=0; i<imark; i++) {
1429             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1430           }
1431         } else {
1432           for (i=0; i<nzB; i++) {
1433             if (cmap[cworkB[i]/bs] < cstart)
1434               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1435             else break;
1436           }
1437           imark = i;
1438         }
1439         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1440         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1441       }
1442     } else {
1443       if (idx) *idx = 0;
1444       if (v)   *v   = 0;
1445     }
1446   }
1447   *nz = nztot;
1448   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1449   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNCT__
1454 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1455 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1456 {
1457   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1458 
1459   PetscFunctionBegin;
1460   if (baij->getrowactive == PETSC_FALSE) {
1461     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1462   }
1463   baij->getrowactive = PETSC_FALSE;
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1469 PetscErrorCode MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1470 {
1471   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1472 
1473   PetscFunctionBegin;
1474   *bs = baij->bs;
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1480 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1481 {
1482   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1483   PetscErrorCode ierr;
1484 
1485   PetscFunctionBegin;
1486   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1487   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1493 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1494 {
1495   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1496   Mat         A = a->A,B = a->B;
1497   PetscErrorCode ierr;
1498   PetscReal   isend[5],irecv[5];
1499 
1500   PetscFunctionBegin;
1501   info->block_size     = (PetscReal)a->bs;
1502   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1503   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1504   isend[3] = info->memory;  isend[4] = info->mallocs;
1505   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1506   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1507   isend[3] += info->memory;  isend[4] += info->mallocs;
1508   if (flag == MAT_LOCAL) {
1509     info->nz_used      = isend[0];
1510     info->nz_allocated = isend[1];
1511     info->nz_unneeded  = isend[2];
1512     info->memory       = isend[3];
1513     info->mallocs      = isend[4];
1514   } else if (flag == MAT_GLOBAL_MAX) {
1515     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1516     info->nz_used      = irecv[0];
1517     info->nz_allocated = irecv[1];
1518     info->nz_unneeded  = irecv[2];
1519     info->memory       = irecv[3];
1520     info->mallocs      = irecv[4];
1521   } else if (flag == MAT_GLOBAL_SUM) {
1522     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1523     info->nz_used      = irecv[0];
1524     info->nz_allocated = irecv[1];
1525     info->nz_unneeded  = irecv[2];
1526     info->memory       = irecv[3];
1527     info->mallocs      = irecv[4];
1528   } else {
1529     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1530   }
1531   info->rows_global       = (PetscReal)A->M;
1532   info->columns_global    = (PetscReal)A->N;
1533   info->rows_local        = (PetscReal)A->m;
1534   info->columns_local     = (PetscReal)A->N;
1535   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1536   info->fill_ratio_needed = 0;
1537   info->factor_mallocs    = 0;
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1543 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1544 {
1545   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1546   PetscErrorCode ierr;
1547 
1548   PetscFunctionBegin;
1549   switch (op) {
1550   case MAT_NO_NEW_NONZERO_LOCATIONS:
1551   case MAT_YES_NEW_NONZERO_LOCATIONS:
1552   case MAT_COLUMNS_UNSORTED:
1553   case MAT_COLUMNS_SORTED:
1554   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1555   case MAT_KEEP_ZEROED_ROWS:
1556   case MAT_NEW_NONZERO_LOCATION_ERR:
1557     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1558     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1559     break;
1560   case MAT_ROW_ORIENTED:
1561     a->roworiented = PETSC_TRUE;
1562     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1563     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1564     break;
1565   case MAT_ROWS_SORTED:
1566   case MAT_ROWS_UNSORTED:
1567   case MAT_YES_NEW_DIAGONALS:
1568     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1569     break;
1570   case MAT_COLUMN_ORIENTED:
1571     a->roworiented = PETSC_FALSE;
1572     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1573     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1574     break;
1575   case MAT_IGNORE_OFF_PROC_ENTRIES:
1576     a->donotstash = PETSC_TRUE;
1577     break;
1578   case MAT_NO_NEW_DIAGONALS:
1579     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1580   case MAT_USE_HASH_TABLE:
1581     a->ht_flag = PETSC_TRUE;
1582     break;
1583   case MAT_SYMMETRIC:
1584   case MAT_STRUCTURALLY_SYMMETRIC:
1585   case MAT_HERMITIAN:
1586   case MAT_SYMMETRY_ETERNAL:
1587     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1588     break;
1589   case MAT_NOT_SYMMETRIC:
1590   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1591   case MAT_NOT_HERMITIAN:
1592   case MAT_NOT_SYMMETRY_ETERNAL:
1593     break;
1594   default:
1595     SETERRQ(PETSC_ERR_SUP,"unknown option");
1596   }
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1602 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1603 {
1604   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1605   Mat_SeqBAIJ *Aloc;
1606   Mat         B;
1607   PetscErrorCode ierr;
1608   int M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1609   int         bs=baij->bs,mbs=baij->mbs;
1610   MatScalar   *a;
1611 
1612   PetscFunctionBegin;
1613   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1614   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1615   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1616   ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1617 
1618   /* copy over the A part */
1619   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1620   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1621   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1622 
1623   for (i=0; i<mbs; i++) {
1624     rvals[0] = bs*(baij->rstart + i);
1625     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1626     for (j=ai[i]; j<ai[i+1]; j++) {
1627       col = (baij->cstart+aj[j])*bs;
1628       for (k=0; k<bs; k++) {
1629         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1630         col++; a += bs;
1631       }
1632     }
1633   }
1634   /* copy over the B part */
1635   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1636   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1637   for (i=0; i<mbs; i++) {
1638     rvals[0] = bs*(baij->rstart + i);
1639     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1640     for (j=ai[i]; j<ai[i+1]; j++) {
1641       col = baij->garray[aj[j]]*bs;
1642       for (k=0; k<bs; k++) {
1643         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1644         col++; a += bs;
1645       }
1646     }
1647   }
1648   ierr = PetscFree(rvals);CHKERRQ(ierr);
1649   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1650   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1651 
1652   if (matout) {
1653     *matout = B;
1654   } else {
1655     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1656   }
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1662 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1663 {
1664   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1665   Mat         a = baij->A,b = baij->B;
1666   PetscErrorCode ierr;
1667   int s1,s2,s3;
1668 
1669   PetscFunctionBegin;
1670   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1671   if (rr) {
1672     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1673     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1674     /* Overlap communication with computation. */
1675     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1676   }
1677   if (ll) {
1678     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1679     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1680     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1681   }
1682   /* scale  the diagonal block */
1683   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1684 
1685   if (rr) {
1686     /* Do a scatter end and then right scale the off-diagonal block */
1687     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1688     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1689   }
1690 
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1696 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1697 {
1698   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1699   PetscErrorCode ierr;
1700   int            i,N,*rows,*owners = l->rowners,size = l->size;
1701   int            *nprocs,j,idx,nsends,row;
1702   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1703   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1704   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1705   MPI_Comm       comm = A->comm;
1706   MPI_Request    *send_waits,*recv_waits;
1707   MPI_Status     recv_status,*send_status;
1708   IS             istmp;
1709   PetscTruth     found;
1710 
1711   PetscFunctionBegin;
1712   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1713   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1714 
1715   /*  first count number of contributors to each processor */
1716   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1717   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1718   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1719   for (i=0; i<N; i++) {
1720     idx   = rows[i];
1721     found = PETSC_FALSE;
1722     for (j=0; j<size; j++) {
1723       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1724         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1725       }
1726     }
1727     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1728   }
1729   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1730 
1731   /* inform other processors of number of messages and max length*/
1732   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1733 
1734   /* post receives:   */
1735   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1736   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1737   for (i=0; i<nrecvs; i++) {
1738     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1739   }
1740 
1741   /* do sends:
1742      1) starts[i] gives the starting index in svalues for stuff going to
1743      the ith processor
1744   */
1745   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1746   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1747   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1748   starts[0]  = 0;
1749   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1750   for (i=0; i<N; i++) {
1751     svalues[starts[owner[i]]++] = rows[i];
1752   }
1753   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1754 
1755   starts[0] = 0;
1756   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1757   count = 0;
1758   for (i=0; i<size; i++) {
1759     if (nprocs[2*i+1]) {
1760       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1761     }
1762   }
1763   ierr = PetscFree(starts);CHKERRQ(ierr);
1764 
1765   base = owners[rank]*bs;
1766 
1767   /*  wait on receives */
1768   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1769   source = lens + nrecvs;
1770   count  = nrecvs; slen = 0;
1771   while (count) {
1772     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1773     /* unpack receives into our local space */
1774     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1775     source[imdex]  = recv_status.MPI_SOURCE;
1776     lens[imdex]    = n;
1777     slen          += n;
1778     count--;
1779   }
1780   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1781 
1782   /* move the data into the send scatter */
1783   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1784   count = 0;
1785   for (i=0; i<nrecvs; i++) {
1786     values = rvalues + i*nmax;
1787     for (j=0; j<lens[i]; j++) {
1788       lrows[count++] = values[j] - base;
1789     }
1790   }
1791   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1792   ierr = PetscFree(lens);CHKERRQ(ierr);
1793   ierr = PetscFree(owner);CHKERRQ(ierr);
1794   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1795 
1796   /* actually zap the local rows */
1797   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1798   PetscLogObjectParent(A,istmp);
1799 
1800   /*
1801         Zero the required rows. If the "diagonal block" of the matrix
1802      is square and the user wishes to set the diagonal we use seperate
1803      code so that MatSetValues() is not called for each diagonal allocating
1804      new memory, thus calling lots of mallocs and slowing things down.
1805 
1806        Contributed by: Mathew Knepley
1807   */
1808   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1809   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1810   if (diag && (l->A->M == l->A->N)) {
1811     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1812   } else if (diag) {
1813     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1814     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1815       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1816 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1817     }
1818     for (i=0; i<slen; i++) {
1819       row  = lrows[i] + rstart_bs;
1820       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1821     }
1822     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1823     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1824   } else {
1825     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1826   }
1827 
1828   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1829   ierr = PetscFree(lrows);CHKERRQ(ierr);
1830 
1831   /* wait on sends */
1832   if (nsends) {
1833     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1834     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1835     ierr = PetscFree(send_status);CHKERRQ(ierr);
1836   }
1837   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1838   ierr = PetscFree(svalues);CHKERRQ(ierr);
1839 
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1845 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1846 {
1847   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1848   MPI_Comm    comm = A->comm;
1849   static int  called = 0;
1850   PetscErrorCode ierr;
1851 
1852   PetscFunctionBegin;
1853   if (!a->rank) {
1854     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1855   }
1856   if (called) {PetscFunctionReturn(0);} else called = 1;
1857   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1858   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1864 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1865 {
1866   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1867   PetscErrorCode ierr;
1868 
1869   PetscFunctionBegin;
1870   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatEqual_MPIBAIJ"
1878 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1879 {
1880   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1881   Mat         a,b,c,d;
1882   PetscTruth  flg;
1883   PetscErrorCode ierr;
1884 
1885   PetscFunctionBegin;
1886   a = matA->A; b = matA->B;
1887   c = matB->A; d = matB->B;
1888 
1889   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1890   if (flg == PETSC_TRUE) {
1891     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1892   }
1893   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1900 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1901 {
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 /* -------------------------------------------------------------------*/
1910 static struct _MatOps MatOps_Values = {
1911        MatSetValues_MPIBAIJ,
1912        MatGetRow_MPIBAIJ,
1913        MatRestoreRow_MPIBAIJ,
1914        MatMult_MPIBAIJ,
1915 /* 4*/ MatMultAdd_MPIBAIJ,
1916        MatMultTranspose_MPIBAIJ,
1917        MatMultTransposeAdd_MPIBAIJ,
1918        0,
1919        0,
1920        0,
1921 /*10*/ 0,
1922        0,
1923        0,
1924        0,
1925        MatTranspose_MPIBAIJ,
1926 /*15*/ MatGetInfo_MPIBAIJ,
1927        MatEqual_MPIBAIJ,
1928        MatGetDiagonal_MPIBAIJ,
1929        MatDiagonalScale_MPIBAIJ,
1930        MatNorm_MPIBAIJ,
1931 /*20*/ MatAssemblyBegin_MPIBAIJ,
1932        MatAssemblyEnd_MPIBAIJ,
1933        0,
1934        MatSetOption_MPIBAIJ,
1935        MatZeroEntries_MPIBAIJ,
1936 /*25*/ MatZeroRows_MPIBAIJ,
1937        0,
1938        0,
1939        0,
1940        0,
1941 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1942        0,
1943        0,
1944        0,
1945        0,
1946 /*35*/ MatDuplicate_MPIBAIJ,
1947        0,
1948        0,
1949        0,
1950        0,
1951 /*40*/ 0,
1952        MatGetSubMatrices_MPIBAIJ,
1953        MatIncreaseOverlap_MPIBAIJ,
1954        MatGetValues_MPIBAIJ,
1955        0,
1956 /*45*/ MatPrintHelp_MPIBAIJ,
1957        MatScale_MPIBAIJ,
1958        0,
1959        0,
1960        0,
1961 /*50*/ MatGetBlockSize_MPIBAIJ,
1962        0,
1963        0,
1964        0,
1965        0,
1966 /*55*/ 0,
1967        0,
1968        MatSetUnfactored_MPIBAIJ,
1969        0,
1970        MatSetValuesBlocked_MPIBAIJ,
1971 /*60*/ 0,
1972        MatDestroy_MPIBAIJ,
1973        MatView_MPIBAIJ,
1974        MatGetPetscMaps_Petsc,
1975        0,
1976 /*65*/ 0,
1977        0,
1978        0,
1979        0,
1980        0,
1981 /*70*/ MatGetRowMax_MPIBAIJ,
1982        0,
1983        0,
1984        0,
1985        0,
1986 /*75*/ 0,
1987        0,
1988        0,
1989        0,
1990        0,
1991 /*80*/ 0,
1992        0,
1993        0,
1994        0,
1995        MatLoad_MPIBAIJ,
1996 /*85*/ 0,
1997        0,
1998        0,
1999        0,
2000        0,
2001 /*90*/ 0,
2002        0,
2003        0,
2004        0,
2005        0,
2006 /*95*/ 0,
2007        0,
2008        0,
2009        0};
2010 
2011 
2012 EXTERN_C_BEGIN
2013 #undef __FUNCT__
2014 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2015 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2016 {
2017   PetscFunctionBegin;
2018   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2019   *iscopy = PETSC_FALSE;
2020   PetscFunctionReturn(0);
2021 }
2022 EXTERN_C_END
2023 
2024 EXTERN_C_BEGIN
2025 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
2026 EXTERN_C_END
2027 
2028 EXTERN_C_BEGIN
2029 #undef __FUNCT__
2030 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2031 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2032 {
2033   Mat_MPIBAIJ  *b;
2034   PetscErrorCode ierr;
2035   int  i;
2036 
2037   PetscFunctionBegin;
2038   B->preallocated = PETSC_TRUE;
2039   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2040 
2041   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2042   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2043   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2044   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2045   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2046   if (d_nnz) {
2047   for (i=0; i<B->m/bs; i++) {
2048       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2049     }
2050   }
2051   if (o_nnz) {
2052     for (i=0; i<B->m/bs; i++) {
2053       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2054     }
2055   }
2056 
2057   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2058   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2059   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2060   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2061 
2062   b = (Mat_MPIBAIJ*)B->data;
2063   b->bs  = bs;
2064   b->bs2 = bs*bs;
2065   b->mbs = B->m/bs;
2066   b->nbs = B->n/bs;
2067   b->Mbs = B->M/bs;
2068   b->Nbs = B->N/bs;
2069 
2070   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2071   b->rowners[0]    = 0;
2072   for (i=2; i<=b->size; i++) {
2073     b->rowners[i] += b->rowners[i-1];
2074   }
2075   b->rstart    = b->rowners[b->rank];
2076   b->rend      = b->rowners[b->rank+1];
2077 
2078   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2079   b->cowners[0] = 0;
2080   for (i=2; i<=b->size; i++) {
2081     b->cowners[i] += b->cowners[i-1];
2082   }
2083   b->cstart    = b->cowners[b->rank];
2084   b->cend      = b->cowners[b->rank+1];
2085 
2086   for (i=0; i<=b->size; i++) {
2087     b->rowners_bs[i] = b->rowners[i]*bs;
2088   }
2089   b->rstart_bs = b->rstart*bs;
2090   b->rend_bs   = b->rend*bs;
2091   b->cstart_bs = b->cstart*bs;
2092   b->cend_bs   = b->cend*bs;
2093 
2094   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2095   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2096   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2097   PetscLogObjectParent(B,b->A);
2098   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2099   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2100   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2101   PetscLogObjectParent(B,b->B);
2102 
2103   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2104 
2105   PetscFunctionReturn(0);
2106 }
2107 EXTERN_C_END
2108 
2109 EXTERN_C_BEGIN
2110 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2111 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2112 EXTERN_C_END
2113 
2114 /*MC
2115    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2116 
2117    Options Database Keys:
2118 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2119 
2120   Level: beginner
2121 
2122 .seealso: MatCreateMPIBAIJ
2123 M*/
2124 
2125 EXTERN_C_BEGIN
2126 #undef __FUNCT__
2127 #define __FUNCT__ "MatCreate_MPIBAIJ"
2128 PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2129 {
2130   Mat_MPIBAIJ  *b;
2131   PetscErrorCode ierr;
2132   PetscTruth   flg;
2133 
2134   PetscFunctionBegin;
2135   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2136   B->data = (void*)b;
2137 
2138   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2139   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2140   B->mapping    = 0;
2141   B->factor     = 0;
2142   B->assembled  = PETSC_FALSE;
2143 
2144   B->insertmode = NOT_SET_VALUES;
2145   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2146   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2147 
2148   /* build local table of row and column ownerships */
2149   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2150   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2151   b->cowners    = b->rowners + b->size + 2;
2152   b->rowners_bs = b->cowners + b->size + 2;
2153 
2154   /* build cache for off array entries formed */
2155   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2156   b->donotstash  = PETSC_FALSE;
2157   b->colmap      = PETSC_NULL;
2158   b->garray      = PETSC_NULL;
2159   b->roworiented = PETSC_TRUE;
2160 
2161 #if defined(PETSC_USE_MAT_SINGLE)
2162   /* stuff for MatSetValues_XXX in single precision */
2163   b->setvalueslen     = 0;
2164   b->setvaluescopy    = PETSC_NULL;
2165 #endif
2166 
2167   /* stuff used in block assembly */
2168   b->barray       = 0;
2169 
2170   /* stuff used for matrix vector multiply */
2171   b->lvec         = 0;
2172   b->Mvctx        = 0;
2173 
2174   /* stuff for MatGetRow() */
2175   b->rowindices   = 0;
2176   b->rowvalues    = 0;
2177   b->getrowactive = PETSC_FALSE;
2178 
2179   /* hash table stuff */
2180   b->ht           = 0;
2181   b->hd           = 0;
2182   b->ht_size      = 0;
2183   b->ht_flag      = PETSC_FALSE;
2184   b->ht_fact      = 0;
2185   b->ht_total_ct  = 0;
2186   b->ht_insert_ct = 0;
2187 
2188   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2189   if (flg) {
2190     PetscReal fact = 1.39;
2191     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2192     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2193     if (fact <= 1.0) fact = 1.39;
2194     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2195     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2196   }
2197   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2198                                      "MatStoreValues_MPIBAIJ",
2199                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2200   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2201                                      "MatRetrieveValues_MPIBAIJ",
2202                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2203   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2204                                      "MatGetDiagonalBlock_MPIBAIJ",
2205                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2206   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2207                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2208                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2209   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2210                                      "MatDiagonalScaleLocal_MPIBAIJ",
2211                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2212   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2213                                      "MatSetHashTableFactor_MPIBAIJ",
2214                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2215   PetscFunctionReturn(0);
2216 }
2217 EXTERN_C_END
2218 
2219 /*MC
2220    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2221 
2222    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2223    and MATMPIBAIJ otherwise.
2224 
2225    Options Database Keys:
2226 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2227 
2228   Level: beginner
2229 
2230 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2231 M*/
2232 
2233 EXTERN_C_BEGIN
2234 #undef __FUNCT__
2235 #define __FUNCT__ "MatCreate_BAIJ"
2236 PetscErrorCode MatCreate_BAIJ(Mat A)
2237 {
2238   PetscErrorCode ierr;
2239   int size;
2240 
2241   PetscFunctionBegin;
2242   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2243   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2244   if (size == 1) {
2245     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2246   } else {
2247     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2248   }
2249   PetscFunctionReturn(0);
2250 }
2251 EXTERN_C_END
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2255 /*@C
2256    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2257    (block compressed row).  For good matrix assembly performance
2258    the user should preallocate the matrix storage by setting the parameters
2259    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2260    performance can be increased by more than a factor of 50.
2261 
2262    Collective on Mat
2263 
2264    Input Parameters:
2265 +  A - the matrix
2266 .  bs   - size of blockk
2267 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2268            submatrix  (same for all local rows)
2269 .  d_nnz - array containing the number of block nonzeros in the various block rows
2270            of the in diagonal portion of the local (possibly different for each block
2271            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2272 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2273            submatrix (same for all local rows).
2274 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2275            off-diagonal portion of the local submatrix (possibly different for
2276            each block row) or PETSC_NULL.
2277 
2278    Output Parameter:
2279 
2280 
2281    Options Database Keys:
2282 .   -mat_no_unroll - uses code that does not unroll the loops in the
2283                      block calculations (much slower)
2284 .   -mat_block_size - size of the blocks to use
2285 
2286    Notes:
2287    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2288    than it must be used on all processors that share the object for that argument.
2289 
2290    Storage Information:
2291    For a square global matrix we define each processor's diagonal portion
2292    to be its local rows and the corresponding columns (a square submatrix);
2293    each processor's off-diagonal portion encompasses the remainder of the
2294    local matrix (a rectangular submatrix).
2295 
2296    The user can specify preallocated storage for the diagonal part of
2297    the local submatrix with either d_nz or d_nnz (not both).  Set
2298    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2299    memory allocation.  Likewise, specify preallocated storage for the
2300    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2301 
2302    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2303    the figure below we depict these three local rows and all columns (0-11).
2304 
2305 .vb
2306            0 1 2 3 4 5 6 7 8 9 10 11
2307           -------------------
2308    row 3  |  o o o d d d o o o o o o
2309    row 4  |  o o o d d d o o o o o o
2310    row 5  |  o o o d d d o o o o o o
2311           -------------------
2312 .ve
2313 
2314    Thus, any entries in the d locations are stored in the d (diagonal)
2315    submatrix, and any entries in the o locations are stored in the
2316    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2317    stored simply in the MATSEQBAIJ format for compressed row storage.
2318 
2319    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2320    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2321    In general, for PDE problems in which most nonzeros are near the diagonal,
2322    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2323    or you will get TERRIBLE performance; see the users' manual chapter on
2324    matrices.
2325 
2326    Level: intermediate
2327 
2328 .keywords: matrix, block, aij, compressed row, sparse, parallel
2329 
2330 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2331 @*/
2332 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2333 {
2334   PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2335 
2336   PetscFunctionBegin;
2337   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2338   if (f) {
2339     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2340   }
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatCreateMPIBAIJ"
2346 /*@C
2347    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2348    (block compressed row).  For good matrix assembly performance
2349    the user should preallocate the matrix storage by setting the parameters
2350    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2351    performance can be increased by more than a factor of 50.
2352 
2353    Collective on MPI_Comm
2354 
2355    Input Parameters:
2356 +  comm - MPI communicator
2357 .  bs   - size of blockk
2358 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2359            This value should be the same as the local size used in creating the
2360            y vector for the matrix-vector product y = Ax.
2361 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2362            This value should be the same as the local size used in creating the
2363            x vector for the matrix-vector product y = Ax.
2364 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2365 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2366 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2367            submatrix  (same for all local rows)
2368 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2369            of the in diagonal portion of the local (possibly different for each block
2370            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2371 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2372            submatrix (same for all local rows).
2373 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2374            off-diagonal portion of the local submatrix (possibly different for
2375            each block row) or PETSC_NULL.
2376 
2377    Output Parameter:
2378 .  A - the matrix
2379 
2380    Options Database Keys:
2381 .   -mat_no_unroll - uses code that does not unroll the loops in the
2382                      block calculations (much slower)
2383 .   -mat_block_size - size of the blocks to use
2384 
2385    Notes:
2386    A nonzero block is any block that as 1 or more nonzeros in it
2387 
2388    The user MUST specify either the local or global matrix dimensions
2389    (possibly both).
2390 
2391    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2392    than it must be used on all processors that share the object for that argument.
2393 
2394    Storage Information:
2395    For a square global matrix we define each processor's diagonal portion
2396    to be its local rows and the corresponding columns (a square submatrix);
2397    each processor's off-diagonal portion encompasses the remainder of the
2398    local matrix (a rectangular submatrix).
2399 
2400    The user can specify preallocated storage for the diagonal part of
2401    the local submatrix with either d_nz or d_nnz (not both).  Set
2402    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2403    memory allocation.  Likewise, specify preallocated storage for the
2404    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2405 
2406    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2407    the figure below we depict these three local rows and all columns (0-11).
2408 
2409 .vb
2410            0 1 2 3 4 5 6 7 8 9 10 11
2411           -------------------
2412    row 3  |  o o o d d d o o o o o o
2413    row 4  |  o o o d d d o o o o o o
2414    row 5  |  o o o d d d o o o o o o
2415           -------------------
2416 .ve
2417 
2418    Thus, any entries in the d locations are stored in the d (diagonal)
2419    submatrix, and any entries in the o locations are stored in the
2420    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2421    stored simply in the MATSEQBAIJ format for compressed row storage.
2422 
2423    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2424    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2425    In general, for PDE problems in which most nonzeros are near the diagonal,
2426    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2427    or you will get TERRIBLE performance; see the users' manual chapter on
2428    matrices.
2429 
2430    Level: intermediate
2431 
2432 .keywords: matrix, block, aij, compressed row, sparse, parallel
2433 
2434 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2435 @*/
2436 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2437 {
2438   PetscErrorCode ierr;
2439   int size;
2440 
2441   PetscFunctionBegin;
2442   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2443   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2444   if (size > 1) {
2445     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2446     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2447   } else {
2448     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2449     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2450   }
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2456 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2457 {
2458   Mat         mat;
2459   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2460   PetscErrorCode ierr;
2461   int len=0;
2462 
2463   PetscFunctionBegin;
2464   *newmat       = 0;
2465   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2466   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2467   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2468 
2469   mat->factor       = matin->factor;
2470   mat->preallocated = PETSC_TRUE;
2471   mat->assembled    = PETSC_TRUE;
2472   mat->insertmode   = NOT_SET_VALUES;
2473 
2474   a      = (Mat_MPIBAIJ*)mat->data;
2475   a->bs  = oldmat->bs;
2476   a->bs2 = oldmat->bs2;
2477   a->mbs = oldmat->mbs;
2478   a->nbs = oldmat->nbs;
2479   a->Mbs = oldmat->Mbs;
2480   a->Nbs = oldmat->Nbs;
2481 
2482   a->rstart       = oldmat->rstart;
2483   a->rend         = oldmat->rend;
2484   a->cstart       = oldmat->cstart;
2485   a->cend         = oldmat->cend;
2486   a->size         = oldmat->size;
2487   a->rank         = oldmat->rank;
2488   a->donotstash   = oldmat->donotstash;
2489   a->roworiented  = oldmat->roworiented;
2490   a->rowindices   = 0;
2491   a->rowvalues    = 0;
2492   a->getrowactive = PETSC_FALSE;
2493   a->barray       = 0;
2494   a->rstart_bs    = oldmat->rstart_bs;
2495   a->rend_bs      = oldmat->rend_bs;
2496   a->cstart_bs    = oldmat->cstart_bs;
2497   a->cend_bs      = oldmat->cend_bs;
2498 
2499   /* hash table stuff */
2500   a->ht           = 0;
2501   a->hd           = 0;
2502   a->ht_size      = 0;
2503   a->ht_flag      = oldmat->ht_flag;
2504   a->ht_fact      = oldmat->ht_fact;
2505   a->ht_total_ct  = 0;
2506   a->ht_insert_ct = 0;
2507 
2508   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2509   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2510   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2511   if (oldmat->colmap) {
2512 #if defined (PETSC_USE_CTABLE)
2513   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2514 #else
2515   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2516   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2517   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2518 #endif
2519   } else a->colmap = 0;
2520 
2521   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2522     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2523     PetscLogObjectMemory(mat,len*sizeof(int));
2524     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2525   } else a->garray = 0;
2526 
2527   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2528   PetscLogObjectParent(mat,a->lvec);
2529   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2530   PetscLogObjectParent(mat,a->Mvctx);
2531 
2532   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2533   PetscLogObjectParent(mat,a->A);
2534   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2535   PetscLogObjectParent(mat,a->B);
2536   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2537   *newmat = mat;
2538 
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 #include "petscsys.h"
2543 
2544 #undef __FUNCT__
2545 #define __FUNCT__ "MatLoad_MPIBAIJ"
2546 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2547 {
2548   Mat          A;
2549   PetscErrorCode ierr;
2550   int          i,nz,j,rstart,rend,fd;
2551   PetscScalar  *vals,*buf;
2552   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2553   MPI_Status   status;
2554   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2555   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2556   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2557   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2558   int          dcount,kmax,k,nzcount,tmp;
2559 
2560   PetscFunctionBegin;
2561   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2562 
2563   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2564   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2565   if (!rank) {
2566     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2567     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2568     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2569     if (header[3] < 0) {
2570       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2571     }
2572   }
2573 
2574   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2575   M = header[1]; N = header[2];
2576 
2577   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2578 
2579   /*
2580      This code adds extra rows to make sure the number of rows is
2581      divisible by the blocksize
2582   */
2583   Mbs        = M/bs;
2584   extra_rows = bs - M + bs*(Mbs);
2585   if (extra_rows == bs) extra_rows = 0;
2586   else                  Mbs++;
2587   if (extra_rows &&!rank) {
2588     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2589   }
2590 
2591   /* determine ownership of all rows */
2592   mbs        = Mbs/size + ((Mbs % size) > rank);
2593   m          = mbs*bs;
2594   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2595   browners   = rowners + size + 1;
2596   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2597   rowners[0] = 0;
2598   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2599   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2600   rstart = rowners[rank];
2601   rend   = rowners[rank+1];
2602 
2603   /* distribute row lengths to all processors */
2604   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2605   if (!rank) {
2606     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2607     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2608     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2609     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2610     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2611     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2612     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2613   } else {
2614     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2615   }
2616 
2617   if (!rank) {
2618     /* calculate the number of nonzeros on each processor */
2619     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2620     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2621     for (i=0; i<size; i++) {
2622       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2623         procsnz[i] += rowlengths[j];
2624       }
2625     }
2626     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2627 
2628     /* determine max buffer needed and allocate it */
2629     maxnz = 0;
2630     for (i=0; i<size; i++) {
2631       maxnz = PetscMax(maxnz,procsnz[i]);
2632     }
2633     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2634 
2635     /* read in my part of the matrix column indices  */
2636     nz     = procsnz[0];
2637     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2638     mycols = ibuf;
2639     if (size == 1)  nz -= extra_rows;
2640     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2641     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2642 
2643     /* read in every ones (except the last) and ship off */
2644     for (i=1; i<size-1; i++) {
2645       nz   = procsnz[i];
2646       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2647       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2648     }
2649     /* read in the stuff for the last proc */
2650     if (size != 1) {
2651       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2652       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2653       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2654       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2655     }
2656     ierr = PetscFree(cols);CHKERRQ(ierr);
2657   } else {
2658     /* determine buffer space needed for message */
2659     nz = 0;
2660     for (i=0; i<m; i++) {
2661       nz += locrowlens[i];
2662     }
2663     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2664     mycols = ibuf;
2665     /* receive message of column indices*/
2666     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2667     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2668     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2669   }
2670 
2671   /* loop over local rows, determining number of off diagonal entries */
2672   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2673   odlens   = dlens + (rend-rstart);
2674   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2675   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2676   masked1  = mask    + Mbs;
2677   masked2  = masked1 + Mbs;
2678   rowcount = 0; nzcount = 0;
2679   for (i=0; i<mbs; i++) {
2680     dcount  = 0;
2681     odcount = 0;
2682     for (j=0; j<bs; j++) {
2683       kmax = locrowlens[rowcount];
2684       for (k=0; k<kmax; k++) {
2685         tmp = mycols[nzcount++]/bs;
2686         if (!mask[tmp]) {
2687           mask[tmp] = 1;
2688           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2689           else masked1[dcount++] = tmp;
2690         }
2691       }
2692       rowcount++;
2693     }
2694 
2695     dlens[i]  = dcount;
2696     odlens[i] = odcount;
2697 
2698     /* zero out the mask elements we set */
2699     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2700     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2701   }
2702 
2703   /* create our matrix */
2704   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2705   ierr = MatSetType(A,type);CHKERRQ(ierr)
2706   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2707 
2708   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2709   MatSetOption(A,MAT_COLUMNS_SORTED);
2710 
2711   if (!rank) {
2712     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2713     /* read in my part of the matrix numerical values  */
2714     nz = procsnz[0];
2715     vals = buf;
2716     mycols = ibuf;
2717     if (size == 1)  nz -= extra_rows;
2718     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2719     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2720 
2721     /* insert into matrix */
2722     jj      = rstart*bs;
2723     for (i=0; i<m; i++) {
2724       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2725       mycols += locrowlens[i];
2726       vals   += locrowlens[i];
2727       jj++;
2728     }
2729     /* read in other processors (except the last one) and ship out */
2730     for (i=1; i<size-1; i++) {
2731       nz   = procsnz[i];
2732       vals = buf;
2733       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2734       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2735     }
2736     /* the last proc */
2737     if (size != 1){
2738       nz   = procsnz[i] - extra_rows;
2739       vals = buf;
2740       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2741       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2742       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2743     }
2744     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2745   } else {
2746     /* receive numeric values */
2747     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2748 
2749     /* receive message of values*/
2750     vals   = buf;
2751     mycols = ibuf;
2752     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2753     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2754     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2755 
2756     /* insert into matrix */
2757     jj      = rstart*bs;
2758     for (i=0; i<m; i++) {
2759       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2760       mycols += locrowlens[i];
2761       vals   += locrowlens[i];
2762       jj++;
2763     }
2764   }
2765   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2766   ierr = PetscFree(buf);CHKERRQ(ierr);
2767   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2768   ierr = PetscFree(rowners);CHKERRQ(ierr);
2769   ierr = PetscFree(dlens);CHKERRQ(ierr);
2770   ierr = PetscFree(mask);CHKERRQ(ierr);
2771   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2772   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2773 
2774   *newmat = A;
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2780 /*@
2781    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2782 
2783    Input Parameters:
2784 .  mat  - the matrix
2785 .  fact - factor
2786 
2787    Collective on Mat
2788 
2789    Level: advanced
2790 
2791   Notes:
2792    This can also be set by the command line option: -mat_use_hash_table fact
2793 
2794 .keywords: matrix, hashtable, factor, HT
2795 
2796 .seealso: MatSetOption()
2797 @*/
2798 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2799 {
2800   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2801 
2802   PetscFunctionBegin;
2803   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2804   if (f) {
2805     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2806   }
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 #undef __FUNCT__
2811 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2812 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2813 {
2814   Mat_MPIBAIJ *baij;
2815 
2816   PetscFunctionBegin;
2817   baij = (Mat_MPIBAIJ*)mat->data;
2818   baij->ht_fact = fact;
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 #undef __FUNCT__
2823 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2824 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2825 {
2826   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2827   PetscFunctionBegin;
2828   *Ad     = a->A;
2829   *Ao     = a->B;
2830   *colmap = a->garray;
2831   PetscFunctionReturn(0);
2832 }
2833