xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 25616d81f464dead70372e35c2cc7e6db2a8f99c)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 
4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   int i;
44   PetscScalar  *va,*vb;
45   Vec          vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
109   PetscErrorCode ierr;
110   int         nbs = B->nbs,i,bs=B->bs;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (int*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (int*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
286 {
287   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288   PetscErrorCode ierr;
289   int  i,N = m*n;
290   MatScalar   *vsingle;
291 
292   PetscFunctionBegin;
293   if (N > b->setvalueslen) {
294     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
295     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
296     b->setvalueslen  = N;
297   }
298   vsingle = b->setvaluescopy;
299 
300   for (i=0; i<N; i++) {
301     vsingle[i] = v[i];
302   }
303   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
310 {
311   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
312   PetscErrorCode ierr;
313   int i,N = m*n*b->bs2;
314   MatScalar   *vsingle;
315 
316   PetscFunctionBegin;
317   if (N > b->setvalueslen) {
318     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
319     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
320     b->setvalueslen  = N;
321   }
322   vsingle = b->setvaluescopy;
323   for (i=0; i<N; i++) {
324     vsingle[i] = v[i];
325   }
326   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
333 {
334   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
335   PetscErrorCode ierr;
336   int i,N = m*n;
337   MatScalar   *vsingle;
338 
339   PetscFunctionBegin;
340   if (N > b->setvalueslen) {
341     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
342     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
343     b->setvalueslen  = N;
344   }
345   vsingle = b->setvaluescopy;
346   for (i=0; i<N; i++) {
347     vsingle[i] = v[i];
348   }
349   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
356 {
357   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
358   PetscErrorCode ierr;
359   int i,N = m*n*b->bs2;
360   MatScalar   *vsingle;
361 
362   PetscFunctionBegin;
363   if (N > b->setvalueslen) {
364     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
365     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
366     b->setvalueslen  = N;
367   }
368   vsingle = b->setvaluescopy;
369   for (i=0; i<N; i++) {
370     vsingle[i] = v[i];
371   }
372   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 #endif
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
380 {
381   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
382   MatScalar   value;
383   PetscTruth  roworiented = baij->roworiented;
384   PetscErrorCode ierr;
385   int i,j,row,col;
386   int         rstart_orig=baij->rstart_bs;
387   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
388   int         cend_orig=baij->cend_bs,bs=baij->bs;
389 
390   /* Some Variables required in the macro */
391   Mat         A = baij->A;
392   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
393   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
394   MatScalar   *aa=a->a;
395 
396   Mat         B = baij->B;
397   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
398   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
399   MatScalar   *ba=b->a;
400 
401   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
402   int         low,high,t,ridx,cidx,bs2=a->bs2;
403   MatScalar   *ap,*bap;
404 
405   PetscFunctionBegin;
406   for (i=0; i<m; i++) {
407     if (im[i] < 0) continue;
408 #if defined(PETSC_USE_BOPT_g)
409     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
410 #endif
411     if (im[i] >= rstart_orig && im[i] < rend_orig) {
412       row = im[i] - rstart_orig;
413       for (j=0; j<n; j++) {
414         if (in[j] >= cstart_orig && in[j] < cend_orig){
415           col = in[j] - cstart_orig;
416           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
417           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
418           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_BOPT_g)
421         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!baij->colmap) {
426               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
430             col  = col - 1;
431 #else
432             col = baij->colmap[in[j]/bs] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
435               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
438               B = baij->B;
439               b = (Mat_SeqBAIJ*)(B)->data;
440               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
441               ba=b->a;
442             } else col += in[j]%bs;
443           } else col = in[j];
444           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
445           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
446           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
447         }
448       }
449     } else {
450       if (!baij->donotstash) {
451         if (roworiented) {
452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453         } else {
454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
455         }
456       }
457     }
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
467   const MatScalar *value;
468   MatScalar       *barray=baij->barray;
469   PetscTruth      roworiented = baij->roworiented;
470   PetscErrorCode ierr;
471   int i,j,ii,jj,row,col,rstart=baij->rstart;
472   int             rend=baij->rend,cstart=baij->cstart,stepval;
473   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
474 
475   PetscFunctionBegin;
476   if(!barray) {
477     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
478     baij->barray = barray;
479   }
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487     if (im[i] < 0) continue;
488 #if defined(PETSC_USE_BOPT_g)
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
490 #endif
491     if (im[i] >= rstart && im[i] < rend) {
492       row = im[i] - rstart;
493       for (j=0; j<n; j++) {
494         /* If NumCol = 1 then a copy is not required */
495         if ((roworiented) && (n == 1)) {
496           barray = (MatScalar*)v + i*bs2;
497         } else if((!roworiented) && (m == 1)) {
498           barray = (MatScalar*)v + j*bs2;
499         } else { /* Here a copy is required */
500           if (roworiented) {
501             value = v + i*(stepval+bs)*bs + j*bs;
502           } else {
503             value = v + j*(stepval+bs)*bs + i*bs;
504           }
505           for (ii=0; ii<bs; ii++,value+=stepval) {
506             for (jj=0; jj<bs; jj++) {
507               *barray++  = *value++;
508             }
509           }
510           barray -=bs2;
511         }
512 
513         if (in[j] >= cstart && in[j] < cend){
514           col  = in[j] - cstart;
515           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516         }
517         else if (in[j] < 0) continue;
518 #if defined(PETSC_USE_BOPT_g)
519         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
520 #endif
521         else {
522           if (mat->was_assembled) {
523             if (!baij->colmap) {
524               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
525             }
526 
527 #if defined(PETSC_USE_BOPT_g)
528 #if defined (PETSC_USE_CTABLE)
529             { int data;
530               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
531               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
532             }
533 #else
534             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
535 #endif
536 #endif
537 #if defined (PETSC_USE_CTABLE)
538 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
539             col  = (col - 1)/bs;
540 #else
541             col = (baij->colmap[in[j]] - 1)/bs;
542 #endif
543             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
544               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
545               col =  in[j];
546             }
547           }
548           else col = in[j];
549           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
550         }
551       }
552     } else {
553       if (!baij->donotstash) {
554         if (roworiented) {
555           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556         } else {
557           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
558         }
559       }
560     }
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #define HASH_KEY 0.6180339887
566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
567 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
568 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
569 #undef __FUNCT__
570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
572 {
573   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
574   PetscTruth  roworiented = baij->roworiented;
575   PetscErrorCode ierr;
576   int i,j,row,col;
577   int         rstart_orig=baij->rstart_bs;
578   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
579   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
580   PetscReal   tmp;
581   MatScalar   **HD = baij->hd,value;
582 #if defined(PETSC_USE_BOPT_g)
583   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
584 #endif
585 
586   PetscFunctionBegin;
587 
588   for (i=0; i<m; i++) {
589 #if defined(PETSC_USE_BOPT_g)
590     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
591     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
592 #endif
593       row = im[i];
594     if (row >= rstart_orig && row < rend_orig) {
595       for (j=0; j<n; j++) {
596         col = in[j];
597         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
598         /* Look up into the Hash Table */
599         key = (row/bs)*Nbs+(col/bs)+1;
600         h1  = HASH(size,key,tmp);
601 
602 
603         idx = h1;
604 #if defined(PETSC_USE_BOPT_g)
605         insert_ct++;
606         total_ct++;
607         if (HT[idx] != key) {
608           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
609           if (idx == size) {
610             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
611             if (idx == h1) {
612               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
613             }
614           }
615         }
616 #else
617         if (HT[idx] != key) {
618           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
619           if (idx == size) {
620             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
621             if (idx == h1) {
622               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
623             }
624           }
625         }
626 #endif
627         /* A HASH table entry is found, so insert the values at the correct address */
628         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
629         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
630       }
631     } else {
632       if (!baij->donotstash) {
633         if (roworiented) {
634           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
635         } else {
636           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
637         }
638       }
639     }
640   }
641 #if defined(PETSC_USE_BOPT_g)
642   baij->ht_total_ct = total_ct;
643   baij->ht_insert_ct = insert_ct;
644 #endif
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
651 {
652   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
653   PetscTruth      roworiented = baij->roworiented;
654   PetscErrorCode ierr;
655   int i,j,ii,jj,row,col;
656   int             rstart=baij->rstart ;
657   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
658   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
659   PetscReal       tmp;
660   MatScalar       **HD = baij->hd,*baij_a;
661   const MatScalar *v_t,*value;
662 #if defined(PETSC_USE_BOPT_g)
663   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
664 #endif
665 
666   PetscFunctionBegin;
667 
668   if (roworiented) {
669     stepval = (n-1)*bs;
670   } else {
671     stepval = (m-1)*bs;
672   }
673   for (i=0; i<m; i++) {
674 #if defined(PETSC_USE_BOPT_g)
675     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
676     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
677 #endif
678     row   = im[i];
679     v_t   = v + i*bs2;
680     if (row >= rstart && row < rend) {
681       for (j=0; j<n; j++) {
682         col = in[j];
683 
684         /* Look up into the Hash Table */
685         key = row*Nbs+col+1;
686         h1  = HASH(size,key,tmp);
687 
688         idx = h1;
689 #if defined(PETSC_USE_BOPT_g)
690         total_ct++;
691         insert_ct++;
692        if (HT[idx] != key) {
693           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
694           if (idx == size) {
695             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
696             if (idx == h1) {
697               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
698             }
699           }
700         }
701 #else
702         if (HT[idx] != key) {
703           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
704           if (idx == size) {
705             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
706             if (idx == h1) {
707               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
708             }
709           }
710         }
711 #endif
712         baij_a = HD[idx];
713         if (roworiented) {
714           /*value = v + i*(stepval+bs)*bs + j*bs;*/
715           /* value = v + (i*(stepval+bs)+j)*bs; */
716           value = v_t;
717           v_t  += bs;
718           if (addv == ADD_VALUES) {
719             for (ii=0; ii<bs; ii++,value+=stepval) {
720               for (jj=ii; jj<bs2; jj+=bs) {
721                 baij_a[jj]  += *value++;
722               }
723             }
724           } else {
725             for (ii=0; ii<bs; ii++,value+=stepval) {
726               for (jj=ii; jj<bs2; jj+=bs) {
727                 baij_a[jj]  = *value++;
728               }
729             }
730           }
731         } else {
732           value = v + j*(stepval+bs)*bs + i*bs;
733           if (addv == ADD_VALUES) {
734             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
735               for (jj=0; jj<bs; jj++) {
736                 baij_a[jj]  += *value++;
737               }
738             }
739           } else {
740             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
741               for (jj=0; jj<bs; jj++) {
742                 baij_a[jj]  = *value++;
743               }
744             }
745           }
746         }
747       }
748     } else {
749       if (!baij->donotstash) {
750         if (roworiented) {
751           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
752         } else {
753           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
754         }
755       }
756     }
757   }
758 #if defined(PETSC_USE_BOPT_g)
759   baij->ht_total_ct = total_ct;
760   baij->ht_insert_ct = insert_ct;
761 #endif
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatGetValues_MPIBAIJ"
767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
768 {
769   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
770   PetscErrorCode ierr;
771   int        bs=baij->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
772   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
773 
774   PetscFunctionBegin;
775   for (i=0; i<m; i++) {
776     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
777     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
778     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
779       row = idxm[i] - bsrstart;
780       for (j=0; j<n; j++) {
781         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
782         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
783         if (idxn[j] >= bscstart && idxn[j] < bscend){
784           col = idxn[j] - bscstart;
785           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786         } else {
787           if (!baij->colmap) {
788             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
789           }
790 #if defined (PETSC_USE_CTABLE)
791           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
792           data --;
793 #else
794           data = baij->colmap[idxn[j]/bs]-1;
795 #endif
796           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
797           else {
798             col  = data + idxn[j]%bs;
799             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
800           }
801         }
802       }
803     } else {
804       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
805     }
806   }
807  PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatNorm_MPIBAIJ"
812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
813 {
814   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
815   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
816   PetscErrorCode ierr;
817   int i,bs2=baij->bs2;
818   PetscReal  sum = 0.0;
819   MatScalar  *v;
820 
821   PetscFunctionBegin;
822   if (baij->size == 1) {
823     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
824   } else {
825     if (type == NORM_FROBENIUS) {
826       v = amat->a;
827       for (i=0; i<amat->nz*bs2; i++) {
828 #if defined(PETSC_USE_COMPLEX)
829         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
830 #else
831         sum += (*v)*(*v); v++;
832 #endif
833       }
834       v = bmat->a;
835       for (i=0; i<bmat->nz*bs2; i++) {
836 #if defined(PETSC_USE_COMPLEX)
837         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
838 #else
839         sum += (*v)*(*v); v++;
840 #endif
841       }
842       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
843       *nrm = sqrt(*nrm);
844     } else {
845       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
846     }
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 
852 /*
853   Creates the hash table, and sets the table
854   This table is created only once.
855   If new entried need to be added to the matrix
856   then the hash table has to be destroyed and
857   recreated.
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
861 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
862 {
863   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
864   Mat         A = baij->A,B=baij->B;
865   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
866   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
867   PetscErrorCode ierr;
868   int         size,bs2=baij->bs2,rstart=baij->rstart;
869   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
870   int         *HT,key;
871   MatScalar   **HD;
872   PetscReal   tmp;
873 #if defined(PETSC_USE_BOPT_g)
874   int         ct=0,max=0;
875 #endif
876 
877   PetscFunctionBegin;
878   baij->ht_size=(int)(factor*nz);
879   size = baij->ht_size;
880 
881   if (baij->ht) {
882     PetscFunctionReturn(0);
883   }
884 
885   /* Allocate Memory for Hash Table */
886   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
887   baij->ht = (int*)(baij->hd + size);
888   HD       = baij->hd;
889   HT       = baij->ht;
890 
891 
892   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
893 
894 
895   /* Loop Over A */
896   for (i=0; i<a->mbs; i++) {
897     for (j=ai[i]; j<ai[i+1]; j++) {
898       row = i+rstart;
899       col = aj[j]+cstart;
900 
901       key = row*Nbs + col + 1;
902       h1  = HASH(size,key,tmp);
903       for (k=0; k<size; k++){
904         if (!HT[(h1+k)%size]) {
905           HT[(h1+k)%size] = key;
906           HD[(h1+k)%size] = a->a + j*bs2;
907           break;
908 #if defined(PETSC_USE_BOPT_g)
909         } else {
910           ct++;
911 #endif
912         }
913       }
914 #if defined(PETSC_USE_BOPT_g)
915       if (k> max) max = k;
916 #endif
917     }
918   }
919   /* Loop Over B */
920   for (i=0; i<b->mbs; i++) {
921     for (j=bi[i]; j<bi[i+1]; j++) {
922       row = i+rstart;
923       col = garray[bj[j]];
924       key = row*Nbs + col + 1;
925       h1  = HASH(size,key,tmp);
926       for (k=0; k<size; k++){
927         if (!HT[(h1+k)%size]) {
928           HT[(h1+k)%size] = key;
929           HD[(h1+k)%size] = b->a + j*bs2;
930           break;
931 #if defined(PETSC_USE_BOPT_g)
932         } else {
933           ct++;
934 #endif
935         }
936       }
937 #if defined(PETSC_USE_BOPT_g)
938       if (k> max) max = k;
939 #endif
940     }
941   }
942 
943   /* Print Summary */
944 #if defined(PETSC_USE_BOPT_g)
945   for (i=0,j=0; i<size; i++) {
946     if (HT[i]) {j++;}
947   }
948   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
949 #endif
950   PetscFunctionReturn(0);
951 }
952 
953 #undef __FUNCT__
954 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
955 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
956 {
957   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
958   PetscErrorCode ierr;
959   int nstash,reallocs;
960   InsertMode  addv;
961 
962   PetscFunctionBegin;
963   if (baij->donotstash) {
964     PetscFunctionReturn(0);
965   }
966 
967   /* make sure all processors are either in INSERTMODE or ADDMODE */
968   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
969   if (addv == (ADD_VALUES|INSERT_VALUES)) {
970     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
971   }
972   mat->insertmode = addv; /* in case this processor had no cache */
973 
974   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
975   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
976   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
977   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
978   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
979   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
985 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
986 {
987   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
988   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
989   PetscErrorCode ierr;
990   int         i,j,rstart,ncols,n,flg,bs2=baij->bs2;
991   int         *row,*col,other_disassembled;
992   PetscTruth  r1,r2,r3;
993   MatScalar   *val;
994   InsertMode  addv = mat->insertmode;
995 
996   PetscFunctionBegin;
997   if (!baij->donotstash) {
998     while (1) {
999       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1000       if (!flg) break;
1001 
1002       for (i=0; i<n;) {
1003         /* Now identify the consecutive vals belonging to the same row */
1004         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1005         if (j < n) ncols = j-i;
1006         else       ncols = n-i;
1007         /* Now assemble all these values with a single function call */
1008         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1009         i = j;
1010       }
1011     }
1012     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1013     /* Now process the block-stash. Since the values are stashed column-oriented,
1014        set the roworiented flag to column oriented, and after MatSetValues()
1015        restore the original flags */
1016     r1 = baij->roworiented;
1017     r2 = a->roworiented;
1018     r3 = b->roworiented;
1019     baij->roworiented = PETSC_FALSE;
1020     a->roworiented    = PETSC_FALSE;
1021     b->roworiented    = PETSC_FALSE;
1022     while (1) {
1023       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1024       if (!flg) break;
1025 
1026       for (i=0; i<n;) {
1027         /* Now identify the consecutive vals belonging to the same row */
1028         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1029         if (j < n) ncols = j-i;
1030         else       ncols = n-i;
1031         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1032         i = j;
1033       }
1034     }
1035     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1036     baij->roworiented = r1;
1037     a->roworiented    = r2;
1038     b->roworiented    = r3;
1039   }
1040 
1041   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1042   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1043 
1044   /* determine if any processor has disassembled, if so we must
1045      also disassemble ourselfs, in order that we may reassemble. */
1046   /*
1047      if nonzero structure of submatrix B cannot change then we know that
1048      no processor disassembled thus we can skip this stuff
1049   */
1050   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1051     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1052     if (mat->was_assembled && !other_disassembled) {
1053       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1054     }
1055   }
1056 
1057   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1058     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1059   }
1060   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1061   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1062 
1063 #if defined(PETSC_USE_BOPT_g)
1064   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1065     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1066     baij->ht_total_ct  = 0;
1067     baij->ht_insert_ct = 0;
1068   }
1069 #endif
1070   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1071     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1072     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1073     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1074   }
1075 
1076   if (baij->rowvalues) {
1077     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1078     baij->rowvalues = 0;
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1085 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1086 {
1087   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1088   PetscErrorCode ierr;
1089   int bs = baij->bs,size = baij->size,rank = baij->rank;
1090   PetscTruth        iascii,isdraw;
1091   PetscViewer       sviewer;
1092   PetscViewerFormat format;
1093 
1094   PetscFunctionBegin;
1095   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1096   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1097   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1098   if (iascii) {
1099     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1100     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1101       MatInfo info;
1102       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1103       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1104       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1105               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1106               baij->bs,(int)info.memory);CHKERRQ(ierr);
1107       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1108       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1109       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1110       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1111       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1112       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1113       PetscFunctionReturn(0);
1114     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1115       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1116       PetscFunctionReturn(0);
1117     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1118       PetscFunctionReturn(0);
1119     }
1120   }
1121 
1122   if (isdraw) {
1123     PetscDraw       draw;
1124     PetscTruth isnull;
1125     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1126     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1127   }
1128 
1129   if (size == 1) {
1130     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1131     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1132   } else {
1133     /* assemble the entire matrix onto first processor. */
1134     Mat         A;
1135     Mat_SeqBAIJ *Aloc;
1136     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1137     MatScalar   *a;
1138 
1139     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1140     /* Perhaps this should be the type of mat? */
1141     if (!rank) {
1142       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1143     } else {
1144       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1145     }
1146     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1147     ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1148     PetscLogObjectParent(mat,A);
1149 
1150     /* copy over the A part */
1151     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1152     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1153     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1154 
1155     for (i=0; i<mbs; i++) {
1156       rvals[0] = bs*(baij->rstart + i);
1157       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1158       for (j=ai[i]; j<ai[i+1]; j++) {
1159         col = (baij->cstart+aj[j])*bs;
1160         for (k=0; k<bs; k++) {
1161           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1162           col++; a += bs;
1163         }
1164       }
1165     }
1166     /* copy over the B part */
1167     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1168     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1169     for (i=0; i<mbs; i++) {
1170       rvals[0] = bs*(baij->rstart + i);
1171       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1172       for (j=ai[i]; j<ai[i+1]; j++) {
1173         col = baij->garray[aj[j]]*bs;
1174         for (k=0; k<bs; k++) {
1175           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1176           col++; a += bs;
1177         }
1178       }
1179     }
1180     ierr = PetscFree(rvals);CHKERRQ(ierr);
1181     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1182     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1183     /*
1184        Everyone has to call to draw the matrix since the graphics waits are
1185        synchronized across all processors that share the PetscDraw object
1186     */
1187     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1188     if (!rank) {
1189       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1190       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1191     }
1192     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1193     ierr = MatDestroy(A);CHKERRQ(ierr);
1194   }
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatView_MPIBAIJ"
1200 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1201 {
1202   PetscErrorCode ierr;
1203   PetscTruth iascii,isdraw,issocket,isbinary;
1204 
1205   PetscFunctionBegin;
1206   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1207   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1208   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1209   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1210   if (iascii || isdraw || issocket || isbinary) {
1211     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1212   } else {
1213     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1214   }
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1220 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1221 {
1222   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226 #if defined(PETSC_USE_LOG)
1227   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1228 #endif
1229   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1230   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1231   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1232   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1233   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1234 #if defined (PETSC_USE_CTABLE)
1235   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1236 #else
1237   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1238 #endif
1239   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1240   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1241   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1242   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1243   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1244   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1245 #if defined(PETSC_USE_MAT_SINGLE)
1246   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1247 #endif
1248   ierr = PetscFree(baij);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatMult_MPIBAIJ"
1254 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1255 {
1256   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258   int nt;
1259 
1260   PetscFunctionBegin;
1261   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1262   if (nt != A->n) {
1263     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1264   }
1265   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1266   if (nt != A->m) {
1267     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1268   }
1269   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1270   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1271   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1272   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1273   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1279 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1280 {
1281   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1282   PetscErrorCode ierr;
1283 
1284   PetscFunctionBegin;
1285   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1286   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1287   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1288   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1294 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1295 {
1296   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1297   PetscErrorCode ierr;
1298 
1299   PetscFunctionBegin;
1300   /* do nondiagonal part */
1301   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1302   /* send it on its way */
1303   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1304   /* do local part */
1305   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1306   /* receive remote parts: note this assumes the values are not actually */
1307   /* inserted in yy until the next line, which is true for my implementation*/
1308   /* but is not perhaps always true. */
1309   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1315 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1316 {
1317   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   /* do nondiagonal part */
1322   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1323   /* send it on its way */
1324   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1325   /* do local part */
1326   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1327   /* receive remote parts: note this assumes the values are not actually */
1328   /* inserted in yy until the next line, which is true for my implementation*/
1329   /* but is not perhaps always true. */
1330   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 /*
1335   This only works correctly for square matrices where the subblock A->A is the
1336    diagonal block
1337 */
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1340 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1341 {
1342   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1343   PetscErrorCode ierr;
1344 
1345   PetscFunctionBegin;
1346   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1347   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatScale_MPIBAIJ"
1353 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1354 {
1355   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1360   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1366 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1367 {
1368   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1369   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1370   PetscErrorCode ierr;
1371   int          bs = mat->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1372   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1373   int          *cmap,*idx_p,cstart = mat->cstart;
1374 
1375   PetscFunctionBegin;
1376   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1377   mat->getrowactive = PETSC_TRUE;
1378 
1379   if (!mat->rowvalues && (idx || v)) {
1380     /*
1381         allocate enough space to hold information from the longest row.
1382     */
1383     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1384     int     max = 1,mbs = mat->mbs,tmp;
1385     for (i=0; i<mbs; i++) {
1386       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1387       if (max < tmp) { max = tmp; }
1388     }
1389     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1390     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1391   }
1392 
1393   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1394   lrow = row - brstart;
1395 
1396   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1397   if (!v)   {pvA = 0; pvB = 0;}
1398   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1399   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1400   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1401   nztot = nzA + nzB;
1402 
1403   cmap  = mat->garray;
1404   if (v  || idx) {
1405     if (nztot) {
1406       /* Sort by increasing column numbers, assuming A and B already sorted */
1407       int imark = -1;
1408       if (v) {
1409         *v = v_p = mat->rowvalues;
1410         for (i=0; i<nzB; i++) {
1411           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1412           else break;
1413         }
1414         imark = i;
1415         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1416         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1417       }
1418       if (idx) {
1419         *idx = idx_p = mat->rowindices;
1420         if (imark > -1) {
1421           for (i=0; i<imark; i++) {
1422             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1423           }
1424         } else {
1425           for (i=0; i<nzB; i++) {
1426             if (cmap[cworkB[i]/bs] < cstart)
1427               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1428             else break;
1429           }
1430           imark = i;
1431         }
1432         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1433         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1434       }
1435     } else {
1436       if (idx) *idx = 0;
1437       if (v)   *v   = 0;
1438     }
1439   }
1440   *nz = nztot;
1441   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1442   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1448 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1449 {
1450   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1451 
1452   PetscFunctionBegin;
1453   if (baij->getrowactive == PETSC_FALSE) {
1454     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1455   }
1456   baij->getrowactive = PETSC_FALSE;
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1462 PetscErrorCode MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1463 {
1464   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1465 
1466   PetscFunctionBegin;
1467   *bs = baij->bs;
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1473 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1474 {
1475   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477 
1478   PetscFunctionBegin;
1479   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1480   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 #undef __FUNCT__
1485 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1486 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1487 {
1488   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1489   Mat         A = a->A,B = a->B;
1490   PetscErrorCode ierr;
1491   PetscReal   isend[5],irecv[5];
1492 
1493   PetscFunctionBegin;
1494   info->block_size     = (PetscReal)a->bs;
1495   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1496   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1497   isend[3] = info->memory;  isend[4] = info->mallocs;
1498   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1499   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1500   isend[3] += info->memory;  isend[4] += info->mallocs;
1501   if (flag == MAT_LOCAL) {
1502     info->nz_used      = isend[0];
1503     info->nz_allocated = isend[1];
1504     info->nz_unneeded  = isend[2];
1505     info->memory       = isend[3];
1506     info->mallocs      = isend[4];
1507   } else if (flag == MAT_GLOBAL_MAX) {
1508     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1509     info->nz_used      = irecv[0];
1510     info->nz_allocated = irecv[1];
1511     info->nz_unneeded  = irecv[2];
1512     info->memory       = irecv[3];
1513     info->mallocs      = irecv[4];
1514   } else if (flag == MAT_GLOBAL_SUM) {
1515     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1516     info->nz_used      = irecv[0];
1517     info->nz_allocated = irecv[1];
1518     info->nz_unneeded  = irecv[2];
1519     info->memory       = irecv[3];
1520     info->mallocs      = irecv[4];
1521   } else {
1522     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1523   }
1524   info->rows_global       = (PetscReal)A->M;
1525   info->columns_global    = (PetscReal)A->N;
1526   info->rows_local        = (PetscReal)A->m;
1527   info->columns_local     = (PetscReal)A->N;
1528   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1529   info->fill_ratio_needed = 0;
1530   info->factor_mallocs    = 0;
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1536 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1537 {
1538   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1539   PetscErrorCode ierr;
1540 
1541   PetscFunctionBegin;
1542   switch (op) {
1543   case MAT_NO_NEW_NONZERO_LOCATIONS:
1544   case MAT_YES_NEW_NONZERO_LOCATIONS:
1545   case MAT_COLUMNS_UNSORTED:
1546   case MAT_COLUMNS_SORTED:
1547   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1548   case MAT_KEEP_ZEROED_ROWS:
1549   case MAT_NEW_NONZERO_LOCATION_ERR:
1550     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1551     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1552     break;
1553   case MAT_ROW_ORIENTED:
1554     a->roworiented = PETSC_TRUE;
1555     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1556     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1557     break;
1558   case MAT_ROWS_SORTED:
1559   case MAT_ROWS_UNSORTED:
1560   case MAT_YES_NEW_DIAGONALS:
1561     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1562     break;
1563   case MAT_COLUMN_ORIENTED:
1564     a->roworiented = PETSC_FALSE;
1565     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1566     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1567     break;
1568   case MAT_IGNORE_OFF_PROC_ENTRIES:
1569     a->donotstash = PETSC_TRUE;
1570     break;
1571   case MAT_NO_NEW_DIAGONALS:
1572     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1573   case MAT_USE_HASH_TABLE:
1574     a->ht_flag = PETSC_TRUE;
1575     break;
1576   case MAT_SYMMETRIC:
1577   case MAT_STRUCTURALLY_SYMMETRIC:
1578   case MAT_HERMITIAN:
1579   case MAT_SYMMETRY_ETERNAL:
1580     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1581     break;
1582   case MAT_NOT_SYMMETRIC:
1583   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1584   case MAT_NOT_HERMITIAN:
1585   case MAT_NOT_SYMMETRY_ETERNAL:
1586     break;
1587   default:
1588     SETERRQ(PETSC_ERR_SUP,"unknown option");
1589   }
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1595 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1596 {
1597   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1598   Mat_SeqBAIJ *Aloc;
1599   Mat         B;
1600   PetscErrorCode ierr;
1601   int M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1602   int         bs=baij->bs,mbs=baij->mbs;
1603   MatScalar   *a;
1604 
1605   PetscFunctionBegin;
1606   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1607   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1608   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1609   ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1610 
1611   /* copy over the A part */
1612   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1613   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1614   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1615 
1616   for (i=0; i<mbs; i++) {
1617     rvals[0] = bs*(baij->rstart + i);
1618     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1619     for (j=ai[i]; j<ai[i+1]; j++) {
1620       col = (baij->cstart+aj[j])*bs;
1621       for (k=0; k<bs; k++) {
1622         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1623         col++; a += bs;
1624       }
1625     }
1626   }
1627   /* copy over the B part */
1628   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1629   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1630   for (i=0; i<mbs; i++) {
1631     rvals[0] = bs*(baij->rstart + i);
1632     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1633     for (j=ai[i]; j<ai[i+1]; j++) {
1634       col = baij->garray[aj[j]]*bs;
1635       for (k=0; k<bs; k++) {
1636         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1637         col++; a += bs;
1638       }
1639     }
1640   }
1641   ierr = PetscFree(rvals);CHKERRQ(ierr);
1642   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1644 
1645   if (matout) {
1646     *matout = B;
1647   } else {
1648     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1649   }
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 #undef __FUNCT__
1654 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1655 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1656 {
1657   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1658   Mat         a = baij->A,b = baij->B;
1659   PetscErrorCode ierr;
1660   int s1,s2,s3;
1661 
1662   PetscFunctionBegin;
1663   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1664   if (rr) {
1665     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1666     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1667     /* Overlap communication with computation. */
1668     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1669   }
1670   if (ll) {
1671     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1672     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1673     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1674   }
1675   /* scale  the diagonal block */
1676   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1677 
1678   if (rr) {
1679     /* Do a scatter end and then right scale the off-diagonal block */
1680     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1681     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1682   }
1683 
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1689 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1690 {
1691   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1692   PetscErrorCode ierr;
1693   int            i,N,*rows,*owners = l->rowners,size = l->size;
1694   int            *nprocs,j,idx,nsends,row;
1695   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1696   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1697   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1698   MPI_Comm       comm = A->comm;
1699   MPI_Request    *send_waits,*recv_waits;
1700   MPI_Status     recv_status,*send_status;
1701   IS             istmp;
1702   PetscTruth     found;
1703 
1704   PetscFunctionBegin;
1705   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1706   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1707 
1708   /*  first count number of contributors to each processor */
1709   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1710   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1711   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1712   for (i=0; i<N; i++) {
1713     idx   = rows[i];
1714     found = PETSC_FALSE;
1715     for (j=0; j<size; j++) {
1716       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1717         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1718       }
1719     }
1720     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1721   }
1722   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1723 
1724   /* inform other processors of number of messages and max length*/
1725   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1726 
1727   /* post receives:   */
1728   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1729   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1730   for (i=0; i<nrecvs; i++) {
1731     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1732   }
1733 
1734   /* do sends:
1735      1) starts[i] gives the starting index in svalues for stuff going to
1736      the ith processor
1737   */
1738   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1739   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1740   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1741   starts[0]  = 0;
1742   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1743   for (i=0; i<N; i++) {
1744     svalues[starts[owner[i]]++] = rows[i];
1745   }
1746   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1747 
1748   starts[0] = 0;
1749   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1750   count = 0;
1751   for (i=0; i<size; i++) {
1752     if (nprocs[2*i+1]) {
1753       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1754     }
1755   }
1756   ierr = PetscFree(starts);CHKERRQ(ierr);
1757 
1758   base = owners[rank]*bs;
1759 
1760   /*  wait on receives */
1761   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1762   source = lens + nrecvs;
1763   count  = nrecvs; slen = 0;
1764   while (count) {
1765     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1766     /* unpack receives into our local space */
1767     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1768     source[imdex]  = recv_status.MPI_SOURCE;
1769     lens[imdex]    = n;
1770     slen          += n;
1771     count--;
1772   }
1773   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1774 
1775   /* move the data into the send scatter */
1776   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1777   count = 0;
1778   for (i=0; i<nrecvs; i++) {
1779     values = rvalues + i*nmax;
1780     for (j=0; j<lens[i]; j++) {
1781       lrows[count++] = values[j] - base;
1782     }
1783   }
1784   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1785   ierr = PetscFree(lens);CHKERRQ(ierr);
1786   ierr = PetscFree(owner);CHKERRQ(ierr);
1787   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1788 
1789   /* actually zap the local rows */
1790   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1791   PetscLogObjectParent(A,istmp);
1792 
1793   /*
1794         Zero the required rows. If the "diagonal block" of the matrix
1795      is square and the user wishes to set the diagonal we use seperate
1796      code so that MatSetValues() is not called for each diagonal allocating
1797      new memory, thus calling lots of mallocs and slowing things down.
1798 
1799        Contributed by: Mathew Knepley
1800   */
1801   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1802   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1803   if (diag && (l->A->M == l->A->N)) {
1804     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1805   } else if (diag) {
1806     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1807     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1808       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1809 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1810     }
1811     for (i=0; i<slen; i++) {
1812       row  = lrows[i] + rstart_bs;
1813       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1814     }
1815     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1816     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1817   } else {
1818     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1819   }
1820 
1821   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1822   ierr = PetscFree(lrows);CHKERRQ(ierr);
1823 
1824   /* wait on sends */
1825   if (nsends) {
1826     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1827     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1828     ierr = PetscFree(send_status);CHKERRQ(ierr);
1829   }
1830   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1831   ierr = PetscFree(svalues);CHKERRQ(ierr);
1832 
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1838 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1839 {
1840   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1841   MPI_Comm    comm = A->comm;
1842   static int  called = 0;
1843   PetscErrorCode ierr;
1844 
1845   PetscFunctionBegin;
1846   if (!a->rank) {
1847     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1848   }
1849   if (called) {PetscFunctionReturn(0);} else called = 1;
1850   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1851   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1857 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1858 {
1859   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1860   PetscErrorCode ierr;
1861 
1862   PetscFunctionBegin;
1863   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatEqual_MPIBAIJ"
1871 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1872 {
1873   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1874   Mat         a,b,c,d;
1875   PetscTruth  flg;
1876   PetscErrorCode ierr;
1877 
1878   PetscFunctionBegin;
1879   a = matA->A; b = matA->B;
1880   c = matB->A; d = matB->B;
1881 
1882   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1883   if (flg == PETSC_TRUE) {
1884     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1885   }
1886   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 
1891 #undef __FUNCT__
1892 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1893 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1894 {
1895   PetscErrorCode ierr;
1896 
1897   PetscFunctionBegin;
1898   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 /* -------------------------------------------------------------------*/
1903 static struct _MatOps MatOps_Values = {
1904        MatSetValues_MPIBAIJ,
1905        MatGetRow_MPIBAIJ,
1906        MatRestoreRow_MPIBAIJ,
1907        MatMult_MPIBAIJ,
1908 /* 4*/ MatMultAdd_MPIBAIJ,
1909        MatMultTranspose_MPIBAIJ,
1910        MatMultTransposeAdd_MPIBAIJ,
1911        0,
1912        0,
1913        0,
1914 /*10*/ 0,
1915        0,
1916        0,
1917        0,
1918        MatTranspose_MPIBAIJ,
1919 /*15*/ MatGetInfo_MPIBAIJ,
1920        MatEqual_MPIBAIJ,
1921        MatGetDiagonal_MPIBAIJ,
1922        MatDiagonalScale_MPIBAIJ,
1923        MatNorm_MPIBAIJ,
1924 /*20*/ MatAssemblyBegin_MPIBAIJ,
1925        MatAssemblyEnd_MPIBAIJ,
1926        0,
1927        MatSetOption_MPIBAIJ,
1928        MatZeroEntries_MPIBAIJ,
1929 /*25*/ MatZeroRows_MPIBAIJ,
1930        0,
1931        0,
1932        0,
1933        0,
1934 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1935        0,
1936        0,
1937        0,
1938        0,
1939 /*35*/ MatDuplicate_MPIBAIJ,
1940        0,
1941        0,
1942        0,
1943        0,
1944 /*40*/ 0,
1945        MatGetSubMatrices_MPIBAIJ,
1946        MatIncreaseOverlap_MPIBAIJ,
1947        MatGetValues_MPIBAIJ,
1948        0,
1949 /*45*/ MatPrintHelp_MPIBAIJ,
1950        MatScale_MPIBAIJ,
1951        0,
1952        0,
1953        0,
1954 /*50*/ MatGetBlockSize_MPIBAIJ,
1955        0,
1956        0,
1957        0,
1958        0,
1959 /*55*/ 0,
1960        0,
1961        MatSetUnfactored_MPIBAIJ,
1962        0,
1963        MatSetValuesBlocked_MPIBAIJ,
1964 /*60*/ 0,
1965        MatDestroy_MPIBAIJ,
1966        MatView_MPIBAIJ,
1967        MatGetPetscMaps_Petsc,
1968        0,
1969 /*65*/ 0,
1970        0,
1971        0,
1972        0,
1973        0,
1974 /*70*/ MatGetRowMax_MPIBAIJ,
1975        0,
1976        0,
1977        0,
1978        0,
1979 /*75*/ 0,
1980        0,
1981        0,
1982        0,
1983        0,
1984 /*80*/ 0,
1985        0,
1986        0,
1987        0,
1988        MatLoad_MPIBAIJ,
1989 /*85*/ 0,
1990        0,
1991        0,
1992        0,
1993        0,
1994 /*90*/ 0,
1995        0,
1996        0,
1997        0,
1998        0,
1999 /*95*/ 0,
2000        0,
2001        0,
2002        0};
2003 
2004 
2005 EXTERN_C_BEGIN
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2008 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2009 {
2010   PetscFunctionBegin;
2011   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2012   *iscopy = PETSC_FALSE;
2013   PetscFunctionReturn(0);
2014 }
2015 EXTERN_C_END
2016 
2017 EXTERN_C_BEGIN
2018 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
2019 EXTERN_C_END
2020 
2021 EXTERN_C_BEGIN
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2024 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2025 {
2026   Mat_MPIBAIJ  *b;
2027   PetscErrorCode ierr;
2028   int  i;
2029 
2030   PetscFunctionBegin;
2031   B->preallocated = PETSC_TRUE;
2032   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2033 
2034   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2035   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2036   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2037   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2038   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2039   if (d_nnz) {
2040   for (i=0; i<B->m/bs; i++) {
2041       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2042     }
2043   }
2044   if (o_nnz) {
2045     for (i=0; i<B->m/bs; i++) {
2046       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2047     }
2048   }
2049 
2050   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2051   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2052   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2053   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2054 
2055   b = (Mat_MPIBAIJ*)B->data;
2056   b->bs  = bs;
2057   b->bs2 = bs*bs;
2058   b->mbs = B->m/bs;
2059   b->nbs = B->n/bs;
2060   b->Mbs = B->M/bs;
2061   b->Nbs = B->N/bs;
2062 
2063   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2064   b->rowners[0]    = 0;
2065   for (i=2; i<=b->size; i++) {
2066     b->rowners[i] += b->rowners[i-1];
2067   }
2068   b->rstart    = b->rowners[b->rank];
2069   b->rend      = b->rowners[b->rank+1];
2070 
2071   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2072   b->cowners[0] = 0;
2073   for (i=2; i<=b->size; i++) {
2074     b->cowners[i] += b->cowners[i-1];
2075   }
2076   b->cstart    = b->cowners[b->rank];
2077   b->cend      = b->cowners[b->rank+1];
2078 
2079   for (i=0; i<=b->size; i++) {
2080     b->rowners_bs[i] = b->rowners[i]*bs;
2081   }
2082   b->rstart_bs = b->rstart*bs;
2083   b->rend_bs   = b->rend*bs;
2084   b->cstart_bs = b->cstart*bs;
2085   b->cend_bs   = b->cend*bs;
2086 
2087   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2088   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2089   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2090   PetscLogObjectParent(B,b->A);
2091   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2092   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2093   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2094   PetscLogObjectParent(B,b->B);
2095 
2096   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2097 
2098   PetscFunctionReturn(0);
2099 }
2100 EXTERN_C_END
2101 
2102 EXTERN_C_BEGIN
2103 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2104 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2105 EXTERN_C_END
2106 
2107 /*MC
2108    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2109 
2110    Options Database Keys:
2111 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2112 
2113   Level: beginner
2114 
2115 .seealso: MatCreateMPIBAIJ
2116 M*/
2117 
2118 EXTERN_C_BEGIN
2119 #undef __FUNCT__
2120 #define __FUNCT__ "MatCreate_MPIBAIJ"
2121 PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2122 {
2123   Mat_MPIBAIJ  *b;
2124   PetscErrorCode ierr;
2125   PetscTruth   flg;
2126 
2127   PetscFunctionBegin;
2128   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2129   B->data = (void*)b;
2130 
2131   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2132   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2133   B->mapping    = 0;
2134   B->factor     = 0;
2135   B->assembled  = PETSC_FALSE;
2136 
2137   B->insertmode = NOT_SET_VALUES;
2138   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2139   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2140 
2141   /* build local table of row and column ownerships */
2142   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2143   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2144   b->cowners    = b->rowners + b->size + 2;
2145   b->rowners_bs = b->cowners + b->size + 2;
2146 
2147   /* build cache for off array entries formed */
2148   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2149   b->donotstash  = PETSC_FALSE;
2150   b->colmap      = PETSC_NULL;
2151   b->garray      = PETSC_NULL;
2152   b->roworiented = PETSC_TRUE;
2153 
2154 #if defined(PETSC_USE_MAT_SINGLE)
2155   /* stuff for MatSetValues_XXX in single precision */
2156   b->setvalueslen     = 0;
2157   b->setvaluescopy    = PETSC_NULL;
2158 #endif
2159 
2160   /* stuff used in block assembly */
2161   b->barray       = 0;
2162 
2163   /* stuff used for matrix vector multiply */
2164   b->lvec         = 0;
2165   b->Mvctx        = 0;
2166 
2167   /* stuff for MatGetRow() */
2168   b->rowindices   = 0;
2169   b->rowvalues    = 0;
2170   b->getrowactive = PETSC_FALSE;
2171 
2172   /* hash table stuff */
2173   b->ht           = 0;
2174   b->hd           = 0;
2175   b->ht_size      = 0;
2176   b->ht_flag      = PETSC_FALSE;
2177   b->ht_fact      = 0;
2178   b->ht_total_ct  = 0;
2179   b->ht_insert_ct = 0;
2180 
2181   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2182   if (flg) {
2183     PetscReal fact = 1.39;
2184     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2185     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2186     if (fact <= 1.0) fact = 1.39;
2187     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2188     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2189   }
2190   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2191                                      "MatStoreValues_MPIBAIJ",
2192                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2193   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2194                                      "MatRetrieveValues_MPIBAIJ",
2195                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2196   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2197                                      "MatGetDiagonalBlock_MPIBAIJ",
2198                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2199   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2200                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2201                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2202   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2203                                      "MatDiagonalScaleLocal_MPIBAIJ",
2204                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2205   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2206                                      "MatSetHashTableFactor_MPIBAIJ",
2207                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2208   PetscFunctionReturn(0);
2209 }
2210 EXTERN_C_END
2211 
2212 /*MC
2213    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2214 
2215    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2216    and MATMPIBAIJ otherwise.
2217 
2218    Options Database Keys:
2219 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2220 
2221   Level: beginner
2222 
2223 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2224 M*/
2225 
2226 EXTERN_C_BEGIN
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatCreate_BAIJ"
2229 PetscErrorCode MatCreate_BAIJ(Mat A)
2230 {
2231   PetscErrorCode ierr;
2232   int size;
2233 
2234   PetscFunctionBegin;
2235   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2236   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2237   if (size == 1) {
2238     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2239   } else {
2240     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2241   }
2242   PetscFunctionReturn(0);
2243 }
2244 EXTERN_C_END
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2248 /*@C
2249    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2250    (block compressed row).  For good matrix assembly performance
2251    the user should preallocate the matrix storage by setting the parameters
2252    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2253    performance can be increased by more than a factor of 50.
2254 
2255    Collective on Mat
2256 
2257    Input Parameters:
2258 +  A - the matrix
2259 .  bs   - size of blockk
2260 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2261            submatrix  (same for all local rows)
2262 .  d_nnz - array containing the number of block nonzeros in the various block rows
2263            of the in diagonal portion of the local (possibly different for each block
2264            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2265 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2266            submatrix (same for all local rows).
2267 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2268            off-diagonal portion of the local submatrix (possibly different for
2269            each block row) or PETSC_NULL.
2270 
2271    Output Parameter:
2272 
2273 
2274    Options Database Keys:
2275 .   -mat_no_unroll - uses code that does not unroll the loops in the
2276                      block calculations (much slower)
2277 .   -mat_block_size - size of the blocks to use
2278 
2279    Notes:
2280    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2281    than it must be used on all processors that share the object for that argument.
2282 
2283    Storage Information:
2284    For a square global matrix we define each processor's diagonal portion
2285    to be its local rows and the corresponding columns (a square submatrix);
2286    each processor's off-diagonal portion encompasses the remainder of the
2287    local matrix (a rectangular submatrix).
2288 
2289    The user can specify preallocated storage for the diagonal part of
2290    the local submatrix with either d_nz or d_nnz (not both).  Set
2291    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2292    memory allocation.  Likewise, specify preallocated storage for the
2293    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2294 
2295    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2296    the figure below we depict these three local rows and all columns (0-11).
2297 
2298 .vb
2299            0 1 2 3 4 5 6 7 8 9 10 11
2300           -------------------
2301    row 3  |  o o o d d d o o o o o o
2302    row 4  |  o o o d d d o o o o o o
2303    row 5  |  o o o d d d o o o o o o
2304           -------------------
2305 .ve
2306 
2307    Thus, any entries in the d locations are stored in the d (diagonal)
2308    submatrix, and any entries in the o locations are stored in the
2309    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2310    stored simply in the MATSEQBAIJ format for compressed row storage.
2311 
2312    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2313    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2314    In general, for PDE problems in which most nonzeros are near the diagonal,
2315    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2316    or you will get TERRIBLE performance; see the users' manual chapter on
2317    matrices.
2318 
2319    Level: intermediate
2320 
2321 .keywords: matrix, block, aij, compressed row, sparse, parallel
2322 
2323 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2324 @*/
2325 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2326 {
2327   PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2328 
2329   PetscFunctionBegin;
2330   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2331   if (f) {
2332     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2333   }
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatCreateMPIBAIJ"
2339 /*@C
2340    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2341    (block compressed row).  For good matrix assembly performance
2342    the user should preallocate the matrix storage by setting the parameters
2343    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2344    performance can be increased by more than a factor of 50.
2345 
2346    Collective on MPI_Comm
2347 
2348    Input Parameters:
2349 +  comm - MPI communicator
2350 .  bs   - size of blockk
2351 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2352            This value should be the same as the local size used in creating the
2353            y vector for the matrix-vector product y = Ax.
2354 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2355            This value should be the same as the local size used in creating the
2356            x vector for the matrix-vector product y = Ax.
2357 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2358 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2359 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2360            submatrix  (same for all local rows)
2361 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2362            of the in diagonal portion of the local (possibly different for each block
2363            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2364 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2365            submatrix (same for all local rows).
2366 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2367            off-diagonal portion of the local submatrix (possibly different for
2368            each block row) or PETSC_NULL.
2369 
2370    Output Parameter:
2371 .  A - the matrix
2372 
2373    Options Database Keys:
2374 .   -mat_no_unroll - uses code that does not unroll the loops in the
2375                      block calculations (much slower)
2376 .   -mat_block_size - size of the blocks to use
2377 
2378    Notes:
2379    A nonzero block is any block that as 1 or more nonzeros in it
2380 
2381    The user MUST specify either the local or global matrix dimensions
2382    (possibly both).
2383 
2384    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2385    than it must be used on all processors that share the object for that argument.
2386 
2387    Storage Information:
2388    For a square global matrix we define each processor's diagonal portion
2389    to be its local rows and the corresponding columns (a square submatrix);
2390    each processor's off-diagonal portion encompasses the remainder of the
2391    local matrix (a rectangular submatrix).
2392 
2393    The user can specify preallocated storage for the diagonal part of
2394    the local submatrix with either d_nz or d_nnz (not both).  Set
2395    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2396    memory allocation.  Likewise, specify preallocated storage for the
2397    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2398 
2399    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2400    the figure below we depict these three local rows and all columns (0-11).
2401 
2402 .vb
2403            0 1 2 3 4 5 6 7 8 9 10 11
2404           -------------------
2405    row 3  |  o o o d d d o o o o o o
2406    row 4  |  o o o d d d o o o o o o
2407    row 5  |  o o o d d d o o o o o o
2408           -------------------
2409 .ve
2410 
2411    Thus, any entries in the d locations are stored in the d (diagonal)
2412    submatrix, and any entries in the o locations are stored in the
2413    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2414    stored simply in the MATSEQBAIJ format for compressed row storage.
2415 
2416    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2417    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2418    In general, for PDE problems in which most nonzeros are near the diagonal,
2419    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2420    or you will get TERRIBLE performance; see the users' manual chapter on
2421    matrices.
2422 
2423    Level: intermediate
2424 
2425 .keywords: matrix, block, aij, compressed row, sparse, parallel
2426 
2427 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2428 @*/
2429 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2430 {
2431   PetscErrorCode ierr;
2432   int size;
2433 
2434   PetscFunctionBegin;
2435   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2436   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2437   if (size > 1) {
2438     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2439     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2440   } else {
2441     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2442     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2443   }
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2449 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2450 {
2451   Mat         mat;
2452   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2453   PetscErrorCode ierr;
2454   int len=0;
2455 
2456   PetscFunctionBegin;
2457   *newmat       = 0;
2458   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2459   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2460   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2461 
2462   mat->factor       = matin->factor;
2463   mat->preallocated = PETSC_TRUE;
2464   mat->assembled    = PETSC_TRUE;
2465   mat->insertmode   = NOT_SET_VALUES;
2466 
2467   a      = (Mat_MPIBAIJ*)mat->data;
2468   a->bs  = oldmat->bs;
2469   a->bs2 = oldmat->bs2;
2470   a->mbs = oldmat->mbs;
2471   a->nbs = oldmat->nbs;
2472   a->Mbs = oldmat->Mbs;
2473   a->Nbs = oldmat->Nbs;
2474 
2475   a->rstart       = oldmat->rstart;
2476   a->rend         = oldmat->rend;
2477   a->cstart       = oldmat->cstart;
2478   a->cend         = oldmat->cend;
2479   a->size         = oldmat->size;
2480   a->rank         = oldmat->rank;
2481   a->donotstash   = oldmat->donotstash;
2482   a->roworiented  = oldmat->roworiented;
2483   a->rowindices   = 0;
2484   a->rowvalues    = 0;
2485   a->getrowactive = PETSC_FALSE;
2486   a->barray       = 0;
2487   a->rstart_bs    = oldmat->rstart_bs;
2488   a->rend_bs      = oldmat->rend_bs;
2489   a->cstart_bs    = oldmat->cstart_bs;
2490   a->cend_bs      = oldmat->cend_bs;
2491 
2492   /* hash table stuff */
2493   a->ht           = 0;
2494   a->hd           = 0;
2495   a->ht_size      = 0;
2496   a->ht_flag      = oldmat->ht_flag;
2497   a->ht_fact      = oldmat->ht_fact;
2498   a->ht_total_ct  = 0;
2499   a->ht_insert_ct = 0;
2500 
2501   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2502   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2503   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2504   if (oldmat->colmap) {
2505 #if defined (PETSC_USE_CTABLE)
2506   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2507 #else
2508   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2509   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2510   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2511 #endif
2512   } else a->colmap = 0;
2513 
2514   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2515     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2516     PetscLogObjectMemory(mat,len*sizeof(int));
2517     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2518   } else a->garray = 0;
2519 
2520   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2521   PetscLogObjectParent(mat,a->lvec);
2522   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2523   PetscLogObjectParent(mat,a->Mvctx);
2524 
2525   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2526   PetscLogObjectParent(mat,a->A);
2527   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2528   PetscLogObjectParent(mat,a->B);
2529   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2530   *newmat = mat;
2531 
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #include "petscsys.h"
2536 
2537 #undef __FUNCT__
2538 #define __FUNCT__ "MatLoad_MPIBAIJ"
2539 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2540 {
2541   Mat          A;
2542   PetscErrorCode ierr;
2543   int          i,nz,j,rstart,rend,fd;
2544   PetscScalar  *vals,*buf;
2545   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2546   MPI_Status   status;
2547   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2548   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2549   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2550   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2551   int          dcount,kmax,k,nzcount,tmp;
2552 
2553   PetscFunctionBegin;
2554   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2555 
2556   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2557   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2558   if (!rank) {
2559     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2560     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2561     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2562     if (header[3] < 0) {
2563       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2564     }
2565   }
2566 
2567   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2568   M = header[1]; N = header[2];
2569 
2570   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2571 
2572   /*
2573      This code adds extra rows to make sure the number of rows is
2574      divisible by the blocksize
2575   */
2576   Mbs        = M/bs;
2577   extra_rows = bs - M + bs*(Mbs);
2578   if (extra_rows == bs) extra_rows = 0;
2579   else                  Mbs++;
2580   if (extra_rows &&!rank) {
2581     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2582   }
2583 
2584   /* determine ownership of all rows */
2585   mbs        = Mbs/size + ((Mbs % size) > rank);
2586   m          = mbs*bs;
2587   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2588   browners   = rowners + size + 1;
2589   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2590   rowners[0] = 0;
2591   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2592   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2593   rstart = rowners[rank];
2594   rend   = rowners[rank+1];
2595 
2596   /* distribute row lengths to all processors */
2597   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2598   if (!rank) {
2599     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2600     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2601     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2602     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2603     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2604     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2605     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2606   } else {
2607     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2608   }
2609 
2610   if (!rank) {
2611     /* calculate the number of nonzeros on each processor */
2612     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2613     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2614     for (i=0; i<size; i++) {
2615       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2616         procsnz[i] += rowlengths[j];
2617       }
2618     }
2619     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2620 
2621     /* determine max buffer needed and allocate it */
2622     maxnz = 0;
2623     for (i=0; i<size; i++) {
2624       maxnz = PetscMax(maxnz,procsnz[i]);
2625     }
2626     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2627 
2628     /* read in my part of the matrix column indices  */
2629     nz     = procsnz[0];
2630     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2631     mycols = ibuf;
2632     if (size == 1)  nz -= extra_rows;
2633     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2634     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2635 
2636     /* read in every ones (except the last) and ship off */
2637     for (i=1; i<size-1; i++) {
2638       nz   = procsnz[i];
2639       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2640       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2641     }
2642     /* read in the stuff for the last proc */
2643     if (size != 1) {
2644       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2645       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2646       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2647       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2648     }
2649     ierr = PetscFree(cols);CHKERRQ(ierr);
2650   } else {
2651     /* determine buffer space needed for message */
2652     nz = 0;
2653     for (i=0; i<m; i++) {
2654       nz += locrowlens[i];
2655     }
2656     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2657     mycols = ibuf;
2658     /* receive message of column indices*/
2659     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2660     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2661     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2662   }
2663 
2664   /* loop over local rows, determining number of off diagonal entries */
2665   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2666   odlens   = dlens + (rend-rstart);
2667   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2668   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2669   masked1  = mask    + Mbs;
2670   masked2  = masked1 + Mbs;
2671   rowcount = 0; nzcount = 0;
2672   for (i=0; i<mbs; i++) {
2673     dcount  = 0;
2674     odcount = 0;
2675     for (j=0; j<bs; j++) {
2676       kmax = locrowlens[rowcount];
2677       for (k=0; k<kmax; k++) {
2678         tmp = mycols[nzcount++]/bs;
2679         if (!mask[tmp]) {
2680           mask[tmp] = 1;
2681           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2682           else masked1[dcount++] = tmp;
2683         }
2684       }
2685       rowcount++;
2686     }
2687 
2688     dlens[i]  = dcount;
2689     odlens[i] = odcount;
2690 
2691     /* zero out the mask elements we set */
2692     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2693     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2694   }
2695 
2696   /* create our matrix */
2697   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2698   ierr = MatSetType(A,type);CHKERRQ(ierr)
2699   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2700 
2701   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2702   MatSetOption(A,MAT_COLUMNS_SORTED);
2703 
2704   if (!rank) {
2705     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2706     /* read in my part of the matrix numerical values  */
2707     nz = procsnz[0];
2708     vals = buf;
2709     mycols = ibuf;
2710     if (size == 1)  nz -= extra_rows;
2711     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2712     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2713 
2714     /* insert into matrix */
2715     jj      = rstart*bs;
2716     for (i=0; i<m; i++) {
2717       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2718       mycols += locrowlens[i];
2719       vals   += locrowlens[i];
2720       jj++;
2721     }
2722     /* read in other processors (except the last one) and ship out */
2723     for (i=1; i<size-1; i++) {
2724       nz   = procsnz[i];
2725       vals = buf;
2726       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2727       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2728     }
2729     /* the last proc */
2730     if (size != 1){
2731       nz   = procsnz[i] - extra_rows;
2732       vals = buf;
2733       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2734       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2735       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2736     }
2737     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2738   } else {
2739     /* receive numeric values */
2740     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2741 
2742     /* receive message of values*/
2743     vals   = buf;
2744     mycols = ibuf;
2745     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2746     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2747     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2748 
2749     /* insert into matrix */
2750     jj      = rstart*bs;
2751     for (i=0; i<m; i++) {
2752       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2753       mycols += locrowlens[i];
2754       vals   += locrowlens[i];
2755       jj++;
2756     }
2757   }
2758   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2759   ierr = PetscFree(buf);CHKERRQ(ierr);
2760   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2761   ierr = PetscFree(rowners);CHKERRQ(ierr);
2762   ierr = PetscFree(dlens);CHKERRQ(ierr);
2763   ierr = PetscFree(mask);CHKERRQ(ierr);
2764   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2765   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2766 
2767   *newmat = A;
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 #undef __FUNCT__
2772 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2773 /*@
2774    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2775 
2776    Input Parameters:
2777 .  mat  - the matrix
2778 .  fact - factor
2779 
2780    Collective on Mat
2781 
2782    Level: advanced
2783 
2784   Notes:
2785    This can also be set by the command line option: -mat_use_hash_table fact
2786 
2787 .keywords: matrix, hashtable, factor, HT
2788 
2789 .seealso: MatSetOption()
2790 @*/
2791 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2792 {
2793   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2794 
2795   PetscFunctionBegin;
2796   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2797   if (f) {
2798     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2799   }
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2805 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2806 {
2807   Mat_MPIBAIJ *baij;
2808 
2809   PetscFunctionBegin;
2810   baij = (Mat_MPIBAIJ*)mat->data;
2811   baij->ht_fact = fact;
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 #undef __FUNCT__
2816 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2817 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2818 {
2819   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2820   PetscFunctionBegin;
2821   *Ad     = a->A;
2822   *Ao     = a->B;
2823   *colmap = a->garray;
2824   PetscFunctionReturn(0);
2825 }
2826