xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 9af31e4ad595286b4e2df8194fee047feeccfe42)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 
4 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
5 EXTERN int DisAssemble_MPIBAIJ(Mat);
6 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
7 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
8 EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
9 EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
10 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
11 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
12 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
13 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
14 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
25 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
26 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
27 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
28 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
42   int          ierr,i;
43   PetscScalar  *va,*vb;
44   Vec          vtmp;
45 
46   PetscFunctionBegin;
47 
48   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
49   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
50 
51   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
52   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
53   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
54 
55   for (i=0; i<A->m; i++){
56     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
57   }
58 
59   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
60   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
61   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
62 
63   PetscFunctionReturn(0);
64 }
65 
66 EXTERN_C_BEGIN
67 #undef __FUNCT__
68 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
69 int MatStoreValues_MPIBAIJ(Mat mat)
70 {
71   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
72   int         ierr;
73 
74   PetscFunctionBegin;
75   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
76   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
77   PetscFunctionReturn(0);
78 }
79 EXTERN_C_END
80 
81 EXTERN_C_BEGIN
82 #undef __FUNCT__
83 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
84 int MatRetrieveValues_MPIBAIJ(Mat mat)
85 {
86   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
87   int         ierr;
88 
89   PetscFunctionBegin;
90   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
91   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
92   PetscFunctionReturn(0);
93 }
94 EXTERN_C_END
95 
96 /*
97      Local utility routine that creates a mapping from the global column
98    number to the local number in the off-diagonal part of the local
99    storage of the matrix.  This is done in a non scable way since the
100    length of colmap equals the global matrix length.
101 */
102 #undef __FUNCT__
103 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
104 int CreateColmap_MPIBAIJ_Private(Mat mat)
105 {
106   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
107   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
108   int         nbs = B->nbs,i,bs=B->bs,ierr;
109 
110   PetscFunctionBegin;
111 #if defined (PETSC_USE_CTABLE)
112   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
113   for (i=0; i<nbs; i++){
114     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
115   }
116 #else
117   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
118   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
119   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
120   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
121 #endif
122   PetscFunctionReturn(0);
123 }
124 
125 #define CHUNKSIZE  10
126 
127 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
128 { \
129  \
130     brow = row/bs;  \
131     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
132     rmax = aimax[brow]; nrow = ailen[brow]; \
133       bcol = col/bs; \
134       ridx = row % bs; cidx = col % bs; \
135       low = 0; high = nrow; \
136       while (high-low > 3) { \
137         t = (low+high)/2; \
138         if (rp[t] > bcol) high = t; \
139         else              low  = t; \
140       } \
141       for (_i=low; _i<high; _i++) { \
142         if (rp[_i] > bcol) break; \
143         if (rp[_i] == bcol) { \
144           bap  = ap +  bs2*_i + bs*cidx + ridx; \
145           if (addv == ADD_VALUES) *bap += value;  \
146           else                    *bap  = value;  \
147           goto a_noinsert; \
148         } \
149       } \
150       if (a->nonew == 1) goto a_noinsert; \
151       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
152       if (nrow >= rmax) { \
153         /* there is no extra room in row, therefore enlarge */ \
154         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
155         MatScalar *new_a; \
156  \
157         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
158  \
159         /* malloc new storage space */ \
160         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
161         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
162         new_j   = (int*)(new_a + bs2*new_nz); \
163         new_i   = new_j + new_nz; \
164  \
165         /* copy over old data into new slots */ \
166         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
167         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
168         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
169         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
170         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
171         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
172         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
174                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
175         /* free up old matrix storage */ \
176         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
177         if (!a->singlemalloc) { \
178           ierr = PetscFree(a->i);CHKERRQ(ierr); \
179           ierr = PetscFree(a->j);CHKERRQ(ierr);\
180         } \
181         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
182         a->singlemalloc = PETSC_TRUE; \
183  \
184         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
185         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
186         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
187         a->maxnz += bs2*CHUNKSIZE; \
188         a->reallocs++; \
189         a->nz++; \
190       } \
191       N = nrow++ - 1;  \
192       /* shift up all the later entries in this row */ \
193       for (ii=N; ii>=_i; ii--) { \
194         rp[ii+1] = rp[ii]; \
195         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
196       } \
197       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
198       rp[_i]                      = bcol;  \
199       ap[bs2*_i + bs*cidx + ridx] = value;  \
200       a_noinsert:; \
201     ailen[brow] = nrow; \
202 }
203 
204 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
205 { \
206     brow = row/bs;  \
207     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
208     rmax = bimax[brow]; nrow = bilen[brow]; \
209       bcol = col/bs; \
210       ridx = row % bs; cidx = col % bs; \
211       low = 0; high = nrow; \
212       while (high-low > 3) { \
213         t = (low+high)/2; \
214         if (rp[t] > bcol) high = t; \
215         else              low  = t; \
216       } \
217       for (_i=low; _i<high; _i++) { \
218         if (rp[_i] > bcol) break; \
219         if (rp[_i] == bcol) { \
220           bap  = ap +  bs2*_i + bs*cidx + ridx; \
221           if (addv == ADD_VALUES) *bap += value;  \
222           else                    *bap  = value;  \
223           goto b_noinsert; \
224         } \
225       } \
226       if (b->nonew == 1) goto b_noinsert; \
227       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
228       if (nrow >= rmax) { \
229         /* there is no extra room in row, therefore enlarge */ \
230         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
231         MatScalar *new_a; \
232  \
233         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
234  \
235         /* malloc new storage space */ \
236         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
237         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
238         new_j   = (int*)(new_a + bs2*new_nz); \
239         new_i   = new_j + new_nz; \
240  \
241         /* copy over old data into new slots */ \
242         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
243         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
244         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
245         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
246         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
247         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
248         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
250                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
251         /* free up old matrix storage */ \
252         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
253         if (!b->singlemalloc) { \
254           ierr = PetscFree(b->i);CHKERRQ(ierr); \
255           ierr = PetscFree(b->j);CHKERRQ(ierr); \
256         } \
257         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
258         b->singlemalloc = PETSC_TRUE; \
259  \
260         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
261         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
262         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
263         b->maxnz += bs2*CHUNKSIZE; \
264         b->reallocs++; \
265         b->nz++; \
266       } \
267       N = nrow++ - 1;  \
268       /* shift up all the later entries in this row */ \
269       for (ii=N; ii>=_i; ii--) { \
270         rp[ii+1] = rp[ii]; \
271         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
272       } \
273       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
274       rp[_i]                      = bcol;  \
275       ap[bs2*_i + bs*cidx + ridx] = value;  \
276       b_noinsert:; \
277     bilen[brow] = nrow; \
278 }
279 
280 #if defined(PETSC_USE_MAT_SINGLE)
281 #undef __FUNCT__
282 #define __FUNCT__ "MatSetValues_MPIBAIJ"
283 int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
284 {
285   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
286   int         ierr,i,N = m*n;
287   MatScalar   *vsingle;
288 
289   PetscFunctionBegin;
290   if (N > b->setvalueslen) {
291     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
292     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
293     b->setvalueslen  = N;
294   }
295   vsingle = b->setvaluescopy;
296 
297   for (i=0; i<N; i++) {
298     vsingle[i] = v[i];
299   }
300   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
301   PetscFunctionReturn(0);
302 }
303 
304 #undef __FUNCT__
305 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
306 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
307 {
308   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
309   int         ierr,i,N = m*n*b->bs2;
310   MatScalar   *vsingle;
311 
312   PetscFunctionBegin;
313   if (N > b->setvalueslen) {
314     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
315     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
316     b->setvalueslen  = N;
317   }
318   vsingle = b->setvaluescopy;
319   for (i=0; i<N; i++) {
320     vsingle[i] = v[i];
321   }
322   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
323   PetscFunctionReturn(0);
324 }
325 
326 #undef __FUNCT__
327 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
328 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
329 {
330   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
331   int         ierr,i,N = m*n;
332   MatScalar   *vsingle;
333 
334   PetscFunctionBegin;
335   if (N > b->setvalueslen) {
336     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
337     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
338     b->setvalueslen  = N;
339   }
340   vsingle = b->setvaluescopy;
341   for (i=0; i<N; i++) {
342     vsingle[i] = v[i];
343   }
344   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
350 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
351 {
352   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
353   int         ierr,i,N = m*n*b->bs2;
354   MatScalar   *vsingle;
355 
356   PetscFunctionBegin;
357   if (N > b->setvalueslen) {
358     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
359     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
360     b->setvalueslen  = N;
361   }
362   vsingle = b->setvaluescopy;
363   for (i=0; i<N; i++) {
364     vsingle[i] = v[i];
365   }
366   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 #endif
370 
371 #undef __FUNCT__
372 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
373 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
374 {
375   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
376   MatScalar   value;
377   PetscTruth  roworiented = baij->roworiented;
378   int         ierr,i,j,row,col;
379   int         rstart_orig=baij->rstart_bs;
380   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
381   int         cend_orig=baij->cend_bs,bs=baij->bs;
382 
383   /* Some Variables required in the macro */
384   Mat         A = baij->A;
385   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
386   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
387   MatScalar   *aa=a->a;
388 
389   Mat         B = baij->B;
390   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
391   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
392   MatScalar   *ba=b->a;
393 
394   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
395   int         low,high,t,ridx,cidx,bs2=a->bs2;
396   MatScalar   *ap,*bap;
397 
398   PetscFunctionBegin;
399   for (i=0; i<m; i++) {
400     if (im[i] < 0) continue;
401 #if defined(PETSC_USE_BOPT_g)
402     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
403 #endif
404     if (im[i] >= rstart_orig && im[i] < rend_orig) {
405       row = im[i] - rstart_orig;
406       for (j=0; j<n; j++) {
407         if (in[j] >= cstart_orig && in[j] < cend_orig){
408           col = in[j] - cstart_orig;
409           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
411           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
412         } else if (in[j] < 0) continue;
413 #if defined(PETSC_USE_BOPT_g)
414         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
415 #endif
416         else {
417           if (mat->was_assembled) {
418             if (!baij->colmap) {
419               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
420             }
421 #if defined (PETSC_USE_CTABLE)
422             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
423             col  = col - 1;
424 #else
425             col = baij->colmap[in[j]/bs] - 1;
426 #endif
427             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
428               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
429               col =  in[j];
430               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
431               B = baij->B;
432               b = (Mat_SeqBAIJ*)(B)->data;
433               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
434               ba=b->a;
435             } else col += in[j]%bs;
436           } else col = in[j];
437           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
438           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
439           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
440         }
441       }
442     } else {
443       if (!baij->donotstash) {
444         if (roworiented) {
445           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
446         } else {
447           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
448         }
449       }
450     }
451   }
452   PetscFunctionReturn(0);
453 }
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
457 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
458 {
459   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
460   const MatScalar *value;
461   MatScalar       *barray=baij->barray;
462   PetscTruth      roworiented = baij->roworiented;
463   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
464   int             rend=baij->rend,cstart=baij->cstart,stepval;
465   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
466 
467   PetscFunctionBegin;
468   if(!barray) {
469     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
470     baij->barray = barray;
471   }
472 
473   if (roworiented) {
474     stepval = (n-1)*bs;
475   } else {
476     stepval = (m-1)*bs;
477   }
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_BOPT_g)
481     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row = im[i] - rstart;
485       for (j=0; j<n; j++) {
486         /* If NumCol = 1 then a copy is not required */
487         if ((roworiented) && (n == 1)) {
488           barray = (MatScalar*)v + i*bs2;
489         } else if((!roworiented) && (m == 1)) {
490           barray = (MatScalar*)v + j*bs2;
491         } else { /* Here a copy is required */
492           if (roworiented) {
493             value = v + i*(stepval+bs)*bs + j*bs;
494           } else {
495             value = v + j*(stepval+bs)*bs + i*bs;
496           }
497           for (ii=0; ii<bs; ii++,value+=stepval) {
498             for (jj=0; jj<bs; jj++) {
499               *barray++  = *value++;
500             }
501           }
502           barray -=bs2;
503         }
504 
505         if (in[j] >= cstart && in[j] < cend){
506           col  = in[j] - cstart;
507           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
508         }
509         else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_BOPT_g)
511         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!baij->colmap) {
516               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 
519 #if defined(PETSC_USE_BOPT_g)
520 #if defined (PETSC_USE_CTABLE)
521             { int data;
522               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
523               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
524             }
525 #else
526             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
527 #endif
528 #endif
529 #if defined (PETSC_USE_CTABLE)
530 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
531             col  = (col - 1)/bs;
532 #else
533             col = (baij->colmap[in[j]] - 1)/bs;
534 #endif
535             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
536               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
537               col =  in[j];
538             }
539           }
540           else col = in[j];
541           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
542         }
543       }
544     } else {
545       if (!baij->donotstash) {
546         if (roworiented) {
547           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
548         } else {
549           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
550         }
551       }
552     }
553   }
554   PetscFunctionReturn(0);
555 }
556 
557 #define HASH_KEY 0.6180339887
558 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
559 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
560 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561 #undef __FUNCT__
562 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
563 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
564 {
565   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
566   PetscTruth  roworiented = baij->roworiented;
567   int         ierr,i,j,row,col;
568   int         rstart_orig=baij->rstart_bs;
569   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
570   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
571   PetscReal   tmp;
572   MatScalar   **HD = baij->hd,value;
573 #if defined(PETSC_USE_BOPT_g)
574   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
575 #endif
576 
577   PetscFunctionBegin;
578 
579   for (i=0; i<m; i++) {
580 #if defined(PETSC_USE_BOPT_g)
581     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
582     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
583 #endif
584       row = im[i];
585     if (row >= rstart_orig && row < rend_orig) {
586       for (j=0; j<n; j++) {
587         col = in[j];
588         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
589         /* Look up into the Hash Table */
590         key = (row/bs)*Nbs+(col/bs)+1;
591         h1  = HASH(size,key,tmp);
592 
593 
594         idx = h1;
595 #if defined(PETSC_USE_BOPT_g)
596         insert_ct++;
597         total_ct++;
598         if (HT[idx] != key) {
599           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
600           if (idx == size) {
601             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
602             if (idx == h1) {
603               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
604             }
605           }
606         }
607 #else
608         if (HT[idx] != key) {
609           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
610           if (idx == size) {
611             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
612             if (idx == h1) {
613               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
614             }
615           }
616         }
617 #endif
618         /* A HASH table entry is found, so insert the values at the correct address */
619         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
620         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
621       }
622     } else {
623       if (!baij->donotstash) {
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632 #if defined(PETSC_USE_BOPT_g)
633   baij->ht_total_ct = total_ct;
634   baij->ht_insert_ct = insert_ct;
635 #endif
636   PetscFunctionReturn(0);
637 }
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
641 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
642 {
643   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
644   PetscTruth      roworiented = baij->roworiented;
645   int             ierr,i,j,ii,jj,row,col;
646   int             rstart=baij->rstart ;
647   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
648   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
649   PetscReal       tmp;
650   MatScalar       **HD = baij->hd,*baij_a;
651   const MatScalar *v_t,*value;
652 #if defined(PETSC_USE_BOPT_g)
653   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
654 #endif
655 
656   PetscFunctionBegin;
657 
658   if (roworiented) {
659     stepval = (n-1)*bs;
660   } else {
661     stepval = (m-1)*bs;
662   }
663   for (i=0; i<m; i++) {
664 #if defined(PETSC_USE_BOPT_g)
665     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
666     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
667 #endif
668     row   = im[i];
669     v_t   = v + i*bs2;
670     if (row >= rstart && row < rend) {
671       for (j=0; j<n; j++) {
672         col = in[j];
673 
674         /* Look up into the Hash Table */
675         key = row*Nbs+col+1;
676         h1  = HASH(size,key,tmp);
677 
678         idx = h1;
679 #if defined(PETSC_USE_BOPT_g)
680         total_ct++;
681         insert_ct++;
682        if (HT[idx] != key) {
683           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
684           if (idx == size) {
685             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
686             if (idx == h1) {
687               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
688             }
689           }
690         }
691 #else
692         if (HT[idx] != key) {
693           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
694           if (idx == size) {
695             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
696             if (idx == h1) {
697               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
698             }
699           }
700         }
701 #endif
702         baij_a = HD[idx];
703         if (roworiented) {
704           /*value = v + i*(stepval+bs)*bs + j*bs;*/
705           /* value = v + (i*(stepval+bs)+j)*bs; */
706           value = v_t;
707           v_t  += bs;
708           if (addv == ADD_VALUES) {
709             for (ii=0; ii<bs; ii++,value+=stepval) {
710               for (jj=ii; jj<bs2; jj+=bs) {
711                 baij_a[jj]  += *value++;
712               }
713             }
714           } else {
715             for (ii=0; ii<bs; ii++,value+=stepval) {
716               for (jj=ii; jj<bs2; jj+=bs) {
717                 baij_a[jj]  = *value++;
718               }
719             }
720           }
721         } else {
722           value = v + j*(stepval+bs)*bs + i*bs;
723           if (addv == ADD_VALUES) {
724             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
725               for (jj=0; jj<bs; jj++) {
726                 baij_a[jj]  += *value++;
727               }
728             }
729           } else {
730             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
731               for (jj=0; jj<bs; jj++) {
732                 baij_a[jj]  = *value++;
733               }
734             }
735           }
736         }
737       }
738     } else {
739       if (!baij->donotstash) {
740         if (roworiented) {
741           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
742         } else {
743           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
744         }
745       }
746     }
747   }
748 #if defined(PETSC_USE_BOPT_g)
749   baij->ht_total_ct = total_ct;
750   baij->ht_insert_ct = insert_ct;
751 #endif
752   PetscFunctionReturn(0);
753 }
754 
755 #undef __FUNCT__
756 #define __FUNCT__ "MatGetValues_MPIBAIJ"
757 int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
758 {
759   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
760   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
761   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
762 
763   PetscFunctionBegin;
764   for (i=0; i<m; i++) {
765     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
766     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
767     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
768       row = idxm[i] - bsrstart;
769       for (j=0; j<n; j++) {
770         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
771         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
772         if (idxn[j] >= bscstart && idxn[j] < bscend){
773           col = idxn[j] - bscstart;
774           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
775         } else {
776           if (!baij->colmap) {
777             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
778           }
779 #if defined (PETSC_USE_CTABLE)
780           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
781           data --;
782 #else
783           data = baij->colmap[idxn[j]/bs]-1;
784 #endif
785           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
786           else {
787             col  = data + idxn[j]%bs;
788             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
789           }
790         }
791       }
792     } else {
793       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
794     }
795   }
796  PetscFunctionReturn(0);
797 }
798 
799 #undef __FUNCT__
800 #define __FUNCT__ "MatNorm_MPIBAIJ"
801 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
802 {
803   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
804   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
805   int        ierr,i,bs2=baij->bs2;
806   PetscReal  sum = 0.0;
807   MatScalar  *v;
808 
809   PetscFunctionBegin;
810   if (baij->size == 1) {
811     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
812   } else {
813     if (type == NORM_FROBENIUS) {
814       v = amat->a;
815       for (i=0; i<amat->nz*bs2; i++) {
816 #if defined(PETSC_USE_COMPLEX)
817         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
818 #else
819         sum += (*v)*(*v); v++;
820 #endif
821       }
822       v = bmat->a;
823       for (i=0; i<bmat->nz*bs2; i++) {
824 #if defined(PETSC_USE_COMPLEX)
825         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
826 #else
827         sum += (*v)*(*v); v++;
828 #endif
829       }
830       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
831       *nrm = sqrt(*nrm);
832     } else {
833       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
834     }
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 /*
841   Creates the hash table, and sets the table
842   This table is created only once.
843   If new entried need to be added to the matrix
844   then the hash table has to be destroyed and
845   recreated.
846 */
847 #undef __FUNCT__
848 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
849 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
850 {
851   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
852   Mat         A = baij->A,B=baij->B;
853   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
854   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
855   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
856   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
857   int         *HT,key;
858   MatScalar   **HD;
859   PetscReal   tmp;
860 #if defined(PETSC_USE_BOPT_g)
861   int         ct=0,max=0;
862 #endif
863 
864   PetscFunctionBegin;
865   baij->ht_size=(int)(factor*nz);
866   size = baij->ht_size;
867 
868   if (baij->ht) {
869     PetscFunctionReturn(0);
870   }
871 
872   /* Allocate Memory for Hash Table */
873   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
874   baij->ht = (int*)(baij->hd + size);
875   HD       = baij->hd;
876   HT       = baij->ht;
877 
878 
879   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
880 
881 
882   /* Loop Over A */
883   for (i=0; i<a->mbs; i++) {
884     for (j=ai[i]; j<ai[i+1]; j++) {
885       row = i+rstart;
886       col = aj[j]+cstart;
887 
888       key = row*Nbs + col + 1;
889       h1  = HASH(size,key,tmp);
890       for (k=0; k<size; k++){
891         if (!HT[(h1+k)%size]) {
892           HT[(h1+k)%size] = key;
893           HD[(h1+k)%size] = a->a + j*bs2;
894           break;
895 #if defined(PETSC_USE_BOPT_g)
896         } else {
897           ct++;
898 #endif
899         }
900       }
901 #if defined(PETSC_USE_BOPT_g)
902       if (k> max) max = k;
903 #endif
904     }
905   }
906   /* Loop Over B */
907   for (i=0; i<b->mbs; i++) {
908     for (j=bi[i]; j<bi[i+1]; j++) {
909       row = i+rstart;
910       col = garray[bj[j]];
911       key = row*Nbs + col + 1;
912       h1  = HASH(size,key,tmp);
913       for (k=0; k<size; k++){
914         if (!HT[(h1+k)%size]) {
915           HT[(h1+k)%size] = key;
916           HD[(h1+k)%size] = b->a + j*bs2;
917           break;
918 #if defined(PETSC_USE_BOPT_g)
919         } else {
920           ct++;
921 #endif
922         }
923       }
924 #if defined(PETSC_USE_BOPT_g)
925       if (k> max) max = k;
926 #endif
927     }
928   }
929 
930   /* Print Summary */
931 #if defined(PETSC_USE_BOPT_g)
932   for (i=0,j=0; i<size; i++) {
933     if (HT[i]) {j++;}
934   }
935   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
936 #endif
937   PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNCT__
941 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
942 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
943 {
944   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
945   int         ierr,nstash,reallocs;
946   InsertMode  addv;
947 
948   PetscFunctionBegin;
949   if (baij->donotstash) {
950     PetscFunctionReturn(0);
951   }
952 
953   /* make sure all processors are either in INSERTMODE or ADDMODE */
954   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
955   if (addv == (ADD_VALUES|INSERT_VALUES)) {
956     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
957   }
958   mat->insertmode = addv; /* in case this processor had no cache */
959 
960   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
961   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
962   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
963   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
964   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
965   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
971 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
972 {
973   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
974   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
975   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
976   int         *row,*col,other_disassembled;
977   PetscTruth  r1,r2,r3;
978   MatScalar   *val;
979   InsertMode  addv = mat->insertmode;
980 
981   PetscFunctionBegin;
982   if (!baij->donotstash) {
983     while (1) {
984       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
985       if (!flg) break;
986 
987       for (i=0; i<n;) {
988         /* Now identify the consecutive vals belonging to the same row */
989         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
990         if (j < n) ncols = j-i;
991         else       ncols = n-i;
992         /* Now assemble all these values with a single function call */
993         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
994         i = j;
995       }
996     }
997     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
998     /* Now process the block-stash. Since the values are stashed column-oriented,
999        set the roworiented flag to column oriented, and after MatSetValues()
1000        restore the original flags */
1001     r1 = baij->roworiented;
1002     r2 = a->roworiented;
1003     r3 = b->roworiented;
1004     baij->roworiented = PETSC_FALSE;
1005     a->roworiented    = PETSC_FALSE;
1006     b->roworiented    = PETSC_FALSE;
1007     while (1) {
1008       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1009       if (!flg) break;
1010 
1011       for (i=0; i<n;) {
1012         /* Now identify the consecutive vals belonging to the same row */
1013         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1014         if (j < n) ncols = j-i;
1015         else       ncols = n-i;
1016         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1017         i = j;
1018       }
1019     }
1020     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1021     baij->roworiented = r1;
1022     a->roworiented    = r2;
1023     b->roworiented    = r3;
1024   }
1025 
1026   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1027   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1028 
1029   /* determine if any processor has disassembled, if so we must
1030      also disassemble ourselfs, in order that we may reassemble. */
1031   /*
1032      if nonzero structure of submatrix B cannot change then we know that
1033      no processor disassembled thus we can skip this stuff
1034   */
1035   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1036     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1037     if (mat->was_assembled && !other_disassembled) {
1038       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1039     }
1040   }
1041 
1042   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1043     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1044   }
1045   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1046   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1047 
1048 #if defined(PETSC_USE_BOPT_g)
1049   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1050     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1051     baij->ht_total_ct  = 0;
1052     baij->ht_insert_ct = 0;
1053   }
1054 #endif
1055   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1056     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1057     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1058     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1059   }
1060 
1061   if (baij->rowvalues) {
1062     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1063     baij->rowvalues = 0;
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1070 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1071 {
1072   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1073   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1074   PetscTruth        iascii,isdraw;
1075   PetscViewer       sviewer;
1076   PetscViewerFormat format;
1077 
1078   PetscFunctionBegin;
1079   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1080   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1081   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1082   if (iascii) {
1083     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1084     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1085       MatInfo info;
1086       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1087       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1088       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1089               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1090               baij->bs,(int)info.memory);CHKERRQ(ierr);
1091       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1092       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1093       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1094       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1095       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1096       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1097       PetscFunctionReturn(0);
1098     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1099       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1100       PetscFunctionReturn(0);
1101     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1102       PetscFunctionReturn(0);
1103     }
1104   }
1105 
1106   if (isdraw) {
1107     PetscDraw       draw;
1108     PetscTruth isnull;
1109     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1110     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1111   }
1112 
1113   if (size == 1) {
1114     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1115     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1116   } else {
1117     /* assemble the entire matrix onto first processor. */
1118     Mat         A;
1119     Mat_SeqBAIJ *Aloc;
1120     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1121     MatScalar   *a;
1122 
1123     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1124     /* Perhaps this should be the type of mat? */
1125     if (!rank) {
1126       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1127     } else {
1128       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1129     }
1130     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1131     ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1132     PetscLogObjectParent(mat,A);
1133 
1134     /* copy over the A part */
1135     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1136     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1137     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1138 
1139     for (i=0; i<mbs; i++) {
1140       rvals[0] = bs*(baij->rstart + i);
1141       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1142       for (j=ai[i]; j<ai[i+1]; j++) {
1143         col = (baij->cstart+aj[j])*bs;
1144         for (k=0; k<bs; k++) {
1145           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1146           col++; a += bs;
1147         }
1148       }
1149     }
1150     /* copy over the B part */
1151     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1152     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1153     for (i=0; i<mbs; i++) {
1154       rvals[0] = bs*(baij->rstart + i);
1155       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1156       for (j=ai[i]; j<ai[i+1]; j++) {
1157         col = baij->garray[aj[j]]*bs;
1158         for (k=0; k<bs; k++) {
1159           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1160           col++; a += bs;
1161         }
1162       }
1163     }
1164     ierr = PetscFree(rvals);CHKERRQ(ierr);
1165     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1166     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1167     /*
1168        Everyone has to call to draw the matrix since the graphics waits are
1169        synchronized across all processors that share the PetscDraw object
1170     */
1171     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1172     if (!rank) {
1173       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1174       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1175     }
1176     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1177     ierr = MatDestroy(A);CHKERRQ(ierr);
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatView_MPIBAIJ"
1184 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1185 {
1186   int        ierr;
1187   PetscTruth iascii,isdraw,issocket,isbinary;
1188 
1189   PetscFunctionBegin;
1190   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1193   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1194   if (iascii || isdraw || issocket || isbinary) {
1195     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1196   } else {
1197     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1198   }
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1204 int MatDestroy_MPIBAIJ(Mat mat)
1205 {
1206   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1207   int         ierr;
1208 
1209   PetscFunctionBegin;
1210 #if defined(PETSC_USE_LOG)
1211   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1212 #endif
1213   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1214   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1215   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1216   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1217   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1218 #if defined (PETSC_USE_CTABLE)
1219   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1220 #else
1221   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1222 #endif
1223   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1224   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1225   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1226   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1227   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1228   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1229 #if defined(PETSC_USE_MAT_SINGLE)
1230   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1231 #endif
1232   ierr = PetscFree(baij);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatMult_MPIBAIJ"
1238 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1239 {
1240   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1241   int         ierr,nt;
1242 
1243   PetscFunctionBegin;
1244   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1245   if (nt != A->n) {
1246     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1247   }
1248   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1249   if (nt != A->m) {
1250     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1251   }
1252   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1253   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1254   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1255   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1256   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1262 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1263 {
1264   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1265   int        ierr;
1266 
1267   PetscFunctionBegin;
1268   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1269   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1270   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1271   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1277 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1278 {
1279   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1280   int         ierr;
1281 
1282   PetscFunctionBegin;
1283   /* do nondiagonal part */
1284   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1285   /* send it on its way */
1286   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1287   /* do local part */
1288   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1289   /* receive remote parts: note this assumes the values are not actually */
1290   /* inserted in yy until the next line, which is true for my implementation*/
1291   /* but is not perhaps always true. */
1292   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1298 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1299 {
1300   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1301   int         ierr;
1302 
1303   PetscFunctionBegin;
1304   /* do nondiagonal part */
1305   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1306   /* send it on its way */
1307   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1308   /* do local part */
1309   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1310   /* receive remote parts: note this assumes the values are not actually */
1311   /* inserted in yy until the next line, which is true for my implementation*/
1312   /* but is not perhaps always true. */
1313   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 /*
1318   This only works correctly for square matrices where the subblock A->A is the
1319    diagonal block
1320 */
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1323 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1324 {
1325   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1326   int         ierr;
1327 
1328   PetscFunctionBegin;
1329   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1330   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatScale_MPIBAIJ"
1336 int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1337 {
1338   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1339   int         ierr;
1340 
1341   PetscFunctionBegin;
1342   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1343   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1349 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1350 {
1351   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1352   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1353   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1354   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1355   int          *cmap,*idx_p,cstart = mat->cstart;
1356 
1357   PetscFunctionBegin;
1358   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1359   mat->getrowactive = PETSC_TRUE;
1360 
1361   if (!mat->rowvalues && (idx || v)) {
1362     /*
1363         allocate enough space to hold information from the longest row.
1364     */
1365     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1366     int     max = 1,mbs = mat->mbs,tmp;
1367     for (i=0; i<mbs; i++) {
1368       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1369       if (max < tmp) { max = tmp; }
1370     }
1371     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1372     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1373   }
1374 
1375   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1376   lrow = row - brstart;
1377 
1378   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1379   if (!v)   {pvA = 0; pvB = 0;}
1380   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1381   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1382   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1383   nztot = nzA + nzB;
1384 
1385   cmap  = mat->garray;
1386   if (v  || idx) {
1387     if (nztot) {
1388       /* Sort by increasing column numbers, assuming A and B already sorted */
1389       int imark = -1;
1390       if (v) {
1391         *v = v_p = mat->rowvalues;
1392         for (i=0; i<nzB; i++) {
1393           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1394           else break;
1395         }
1396         imark = i;
1397         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1398         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1399       }
1400       if (idx) {
1401         *idx = idx_p = mat->rowindices;
1402         if (imark > -1) {
1403           for (i=0; i<imark; i++) {
1404             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1405           }
1406         } else {
1407           for (i=0; i<nzB; i++) {
1408             if (cmap[cworkB[i]/bs] < cstart)
1409               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1410             else break;
1411           }
1412           imark = i;
1413         }
1414         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1415         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1416       }
1417     } else {
1418       if (idx) *idx = 0;
1419       if (v)   *v   = 0;
1420     }
1421   }
1422   *nz = nztot;
1423   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1424   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1430 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1431 {
1432   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1433 
1434   PetscFunctionBegin;
1435   if (baij->getrowactive == PETSC_FALSE) {
1436     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1437   }
1438   baij->getrowactive = PETSC_FALSE;
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1444 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1445 {
1446   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1447 
1448   PetscFunctionBegin;
1449   *bs = baij->bs;
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNCT__
1454 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1455 int MatZeroEntries_MPIBAIJ(Mat A)
1456 {
1457   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1458   int         ierr;
1459 
1460   PetscFunctionBegin;
1461   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1462   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1468 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1469 {
1470   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1471   Mat         A = a->A,B = a->B;
1472   int         ierr;
1473   PetscReal   isend[5],irecv[5];
1474 
1475   PetscFunctionBegin;
1476   info->block_size     = (PetscReal)a->bs;
1477   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1478   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1479   isend[3] = info->memory;  isend[4] = info->mallocs;
1480   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1481   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1482   isend[3] += info->memory;  isend[4] += info->mallocs;
1483   if (flag == MAT_LOCAL) {
1484     info->nz_used      = isend[0];
1485     info->nz_allocated = isend[1];
1486     info->nz_unneeded  = isend[2];
1487     info->memory       = isend[3];
1488     info->mallocs      = isend[4];
1489   } else if (flag == MAT_GLOBAL_MAX) {
1490     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1491     info->nz_used      = irecv[0];
1492     info->nz_allocated = irecv[1];
1493     info->nz_unneeded  = irecv[2];
1494     info->memory       = irecv[3];
1495     info->mallocs      = irecv[4];
1496   } else if (flag == MAT_GLOBAL_SUM) {
1497     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1498     info->nz_used      = irecv[0];
1499     info->nz_allocated = irecv[1];
1500     info->nz_unneeded  = irecv[2];
1501     info->memory       = irecv[3];
1502     info->mallocs      = irecv[4];
1503   } else {
1504     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1505   }
1506   info->rows_global       = (PetscReal)A->M;
1507   info->columns_global    = (PetscReal)A->N;
1508   info->rows_local        = (PetscReal)A->m;
1509   info->columns_local     = (PetscReal)A->N;
1510   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1511   info->fill_ratio_needed = 0;
1512   info->factor_mallocs    = 0;
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1518 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1519 {
1520   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1521   int         ierr;
1522 
1523   PetscFunctionBegin;
1524   switch (op) {
1525   case MAT_NO_NEW_NONZERO_LOCATIONS:
1526   case MAT_YES_NEW_NONZERO_LOCATIONS:
1527   case MAT_COLUMNS_UNSORTED:
1528   case MAT_COLUMNS_SORTED:
1529   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1530   case MAT_KEEP_ZEROED_ROWS:
1531   case MAT_NEW_NONZERO_LOCATION_ERR:
1532     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1533     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1534     break;
1535   case MAT_ROW_ORIENTED:
1536     a->roworiented = PETSC_TRUE;
1537     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1538     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1539     break;
1540   case MAT_ROWS_SORTED:
1541   case MAT_ROWS_UNSORTED:
1542   case MAT_YES_NEW_DIAGONALS:
1543     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1544     break;
1545   case MAT_COLUMN_ORIENTED:
1546     a->roworiented = PETSC_FALSE;
1547     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1548     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1549     break;
1550   case MAT_IGNORE_OFF_PROC_ENTRIES:
1551     a->donotstash = PETSC_TRUE;
1552     break;
1553   case MAT_NO_NEW_DIAGONALS:
1554     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1555   case MAT_USE_HASH_TABLE:
1556     a->ht_flag = PETSC_TRUE;
1557     break;
1558   case MAT_SYMMETRIC:
1559   case MAT_STRUCTURALLY_SYMMETRIC:
1560   case MAT_HERMITIAN:
1561   case MAT_SYMMETRY_ETERNAL:
1562     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1563     break;
1564   case MAT_NOT_SYMMETRIC:
1565   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1566   case MAT_NOT_HERMITIAN:
1567   case MAT_NOT_SYMMETRY_ETERNAL:
1568     break;
1569   default:
1570     SETERRQ(PETSC_ERR_SUP,"unknown option");
1571   }
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1577 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1578 {
1579   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1580   Mat_SeqBAIJ *Aloc;
1581   Mat         B;
1582   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1583   int         bs=baij->bs,mbs=baij->mbs;
1584   MatScalar   *a;
1585 
1586   PetscFunctionBegin;
1587   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1588   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1589   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1590   ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1591 
1592   /* copy over the A part */
1593   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1594   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1595   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1596 
1597   for (i=0; i<mbs; i++) {
1598     rvals[0] = bs*(baij->rstart + i);
1599     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1600     for (j=ai[i]; j<ai[i+1]; j++) {
1601       col = (baij->cstart+aj[j])*bs;
1602       for (k=0; k<bs; k++) {
1603         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1604         col++; a += bs;
1605       }
1606     }
1607   }
1608   /* copy over the B part */
1609   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1610   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1611   for (i=0; i<mbs; i++) {
1612     rvals[0] = bs*(baij->rstart + i);
1613     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1614     for (j=ai[i]; j<ai[i+1]; j++) {
1615       col = baij->garray[aj[j]]*bs;
1616       for (k=0; k<bs; k++) {
1617         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1618         col++; a += bs;
1619       }
1620     }
1621   }
1622   ierr = PetscFree(rvals);CHKERRQ(ierr);
1623   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1624   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1625 
1626   if (matout) {
1627     *matout = B;
1628   } else {
1629     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1636 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1637 {
1638   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1639   Mat         a = baij->A,b = baij->B;
1640   int         ierr,s1,s2,s3;
1641 
1642   PetscFunctionBegin;
1643   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1644   if (rr) {
1645     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1646     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1647     /* Overlap communication with computation. */
1648     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1649   }
1650   if (ll) {
1651     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1652     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1653     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1654   }
1655   /* scale  the diagonal block */
1656   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1657 
1658   if (rr) {
1659     /* Do a scatter end and then right scale the off-diagonal block */
1660     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1661     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1662   }
1663 
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1669 int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1670 {
1671   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1672   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1673   int            *nprocs,j,idx,nsends,row;
1674   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1675   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1676   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1677   MPI_Comm       comm = A->comm;
1678   MPI_Request    *send_waits,*recv_waits;
1679   MPI_Status     recv_status,*send_status;
1680   IS             istmp;
1681   PetscTruth     found;
1682 
1683   PetscFunctionBegin;
1684   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1685   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1686 
1687   /*  first count number of contributors to each processor */
1688   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1689   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1690   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1691   for (i=0; i<N; i++) {
1692     idx   = rows[i];
1693     found = PETSC_FALSE;
1694     for (j=0; j<size; j++) {
1695       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1696         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1697       }
1698     }
1699     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1700   }
1701   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1702 
1703   /* inform other processors of number of messages and max length*/
1704   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1705 
1706   /* post receives:   */
1707   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1708   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1709   for (i=0; i<nrecvs; i++) {
1710     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1711   }
1712 
1713   /* do sends:
1714      1) starts[i] gives the starting index in svalues for stuff going to
1715      the ith processor
1716   */
1717   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1718   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1719   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1720   starts[0]  = 0;
1721   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1722   for (i=0; i<N; i++) {
1723     svalues[starts[owner[i]]++] = rows[i];
1724   }
1725   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1726 
1727   starts[0] = 0;
1728   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1729   count = 0;
1730   for (i=0; i<size; i++) {
1731     if (nprocs[2*i+1]) {
1732       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1733     }
1734   }
1735   ierr = PetscFree(starts);CHKERRQ(ierr);
1736 
1737   base = owners[rank]*bs;
1738 
1739   /*  wait on receives */
1740   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1741   source = lens + nrecvs;
1742   count  = nrecvs; slen = 0;
1743   while (count) {
1744     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1745     /* unpack receives into our local space */
1746     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1747     source[imdex]  = recv_status.MPI_SOURCE;
1748     lens[imdex]    = n;
1749     slen          += n;
1750     count--;
1751   }
1752   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1753 
1754   /* move the data into the send scatter */
1755   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1756   count = 0;
1757   for (i=0; i<nrecvs; i++) {
1758     values = rvalues + i*nmax;
1759     for (j=0; j<lens[i]; j++) {
1760       lrows[count++] = values[j] - base;
1761     }
1762   }
1763   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1764   ierr = PetscFree(lens);CHKERRQ(ierr);
1765   ierr = PetscFree(owner);CHKERRQ(ierr);
1766   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1767 
1768   /* actually zap the local rows */
1769   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1770   PetscLogObjectParent(A,istmp);
1771 
1772   /*
1773         Zero the required rows. If the "diagonal block" of the matrix
1774      is square and the user wishes to set the diagonal we use seperate
1775      code so that MatSetValues() is not called for each diagonal allocating
1776      new memory, thus calling lots of mallocs and slowing things down.
1777 
1778        Contributed by: Mathew Knepley
1779   */
1780   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1781   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1782   if (diag && (l->A->M == l->A->N)) {
1783     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1784   } else if (diag) {
1785     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1786     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1787       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1788 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1789     }
1790     for (i=0; i<slen; i++) {
1791       row  = lrows[i] + rstart_bs;
1792       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1793     }
1794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   } else {
1797     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1798   }
1799 
1800   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1801   ierr = PetscFree(lrows);CHKERRQ(ierr);
1802 
1803   /* wait on sends */
1804   if (nsends) {
1805     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1806     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1807     ierr = PetscFree(send_status);CHKERRQ(ierr);
1808   }
1809   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1810   ierr = PetscFree(svalues);CHKERRQ(ierr);
1811 
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1817 int MatPrintHelp_MPIBAIJ(Mat A)
1818 {
1819   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1820   MPI_Comm    comm = A->comm;
1821   static int  called = 0;
1822   int         ierr;
1823 
1824   PetscFunctionBegin;
1825   if (!a->rank) {
1826     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1827   }
1828   if (called) {PetscFunctionReturn(0);} else called = 1;
1829   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1830   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1836 int MatSetUnfactored_MPIBAIJ(Mat A)
1837 {
1838   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1839   int         ierr;
1840 
1841   PetscFunctionBegin;
1842   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatEqual_MPIBAIJ"
1850 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1851 {
1852   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1853   Mat         a,b,c,d;
1854   PetscTruth  flg;
1855   int         ierr;
1856 
1857   PetscFunctionBegin;
1858   a = matA->A; b = matA->B;
1859   c = matB->A; d = matB->B;
1860 
1861   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1862   if (flg == PETSC_TRUE) {
1863     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1864   }
1865   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1872 int MatSetUpPreallocation_MPIBAIJ(Mat A)
1873 {
1874   int        ierr;
1875 
1876   PetscFunctionBegin;
1877   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 /* -------------------------------------------------------------------*/
1882 static struct _MatOps MatOps_Values = {
1883        MatSetValues_MPIBAIJ,
1884        MatGetRow_MPIBAIJ,
1885        MatRestoreRow_MPIBAIJ,
1886        MatMult_MPIBAIJ,
1887 /* 4*/ MatMultAdd_MPIBAIJ,
1888        MatMultTranspose_MPIBAIJ,
1889        MatMultTransposeAdd_MPIBAIJ,
1890        0,
1891        0,
1892        0,
1893 /*10*/ 0,
1894        0,
1895        0,
1896        0,
1897        MatTranspose_MPIBAIJ,
1898 /*15*/ MatGetInfo_MPIBAIJ,
1899        MatEqual_MPIBAIJ,
1900        MatGetDiagonal_MPIBAIJ,
1901        MatDiagonalScale_MPIBAIJ,
1902        MatNorm_MPIBAIJ,
1903 /*20*/ MatAssemblyBegin_MPIBAIJ,
1904        MatAssemblyEnd_MPIBAIJ,
1905        0,
1906        MatSetOption_MPIBAIJ,
1907        MatZeroEntries_MPIBAIJ,
1908 /*25*/ MatZeroRows_MPIBAIJ,
1909        0,
1910        0,
1911        0,
1912        0,
1913 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1914        0,
1915        0,
1916        0,
1917        0,
1918 /*35*/ MatDuplicate_MPIBAIJ,
1919        0,
1920        0,
1921        0,
1922        0,
1923 /*40*/ 0,
1924        MatGetSubMatrices_MPIBAIJ,
1925        MatIncreaseOverlap_MPIBAIJ,
1926        MatGetValues_MPIBAIJ,
1927        0,
1928 /*45*/ MatPrintHelp_MPIBAIJ,
1929        MatScale_MPIBAIJ,
1930        0,
1931        0,
1932        0,
1933 /*50*/ MatGetBlockSize_MPIBAIJ,
1934        0,
1935        0,
1936        0,
1937        0,
1938 /*55*/ 0,
1939        0,
1940        MatSetUnfactored_MPIBAIJ,
1941        0,
1942        MatSetValuesBlocked_MPIBAIJ,
1943 /*60*/ 0,
1944        MatDestroy_MPIBAIJ,
1945        MatView_MPIBAIJ,
1946        MatGetPetscMaps_Petsc,
1947        0,
1948 /*65*/ 0,
1949        0,
1950        0,
1951        0,
1952        0,
1953 /*70*/ MatGetRowMax_MPIBAIJ,
1954        0,
1955        0,
1956        0,
1957        0,
1958 /*75*/ 0,
1959        0,
1960        0,
1961        0,
1962        0,
1963 /*80*/ 0,
1964        0,
1965        0,
1966        0,
1967 /*85*/ MatLoad_MPIBAIJ
1968 };
1969 
1970 
1971 EXTERN_C_BEGIN
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1974 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1975 {
1976   PetscFunctionBegin;
1977   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1978   *iscopy = PETSC_FALSE;
1979   PetscFunctionReturn(0);
1980 }
1981 EXTERN_C_END
1982 
1983 EXTERN_C_BEGIN
1984 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
1985 EXTERN_C_END
1986 
1987 EXTERN_C_BEGIN
1988 #undef __FUNCT__
1989 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
1990 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1991 {
1992   Mat_MPIBAIJ  *b;
1993   int          ierr,i;
1994 
1995   PetscFunctionBegin;
1996   B->preallocated = PETSC_TRUE;
1997   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1998 
1999   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2000   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2001   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2002   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2003   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2004   if (d_nnz) {
2005   for (i=0; i<B->m/bs; i++) {
2006       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2007     }
2008   }
2009   if (o_nnz) {
2010     for (i=0; i<B->m/bs; i++) {
2011       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2012     }
2013   }
2014 
2015   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2016   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2017   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2018   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2019 
2020   b = (Mat_MPIBAIJ*)B->data;
2021   b->bs  = bs;
2022   b->bs2 = bs*bs;
2023   b->mbs = B->m/bs;
2024   b->nbs = B->n/bs;
2025   b->Mbs = B->M/bs;
2026   b->Nbs = B->N/bs;
2027 
2028   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2029   b->rowners[0]    = 0;
2030   for (i=2; i<=b->size; i++) {
2031     b->rowners[i] += b->rowners[i-1];
2032   }
2033   b->rstart    = b->rowners[b->rank];
2034   b->rend      = b->rowners[b->rank+1];
2035 
2036   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2037   b->cowners[0] = 0;
2038   for (i=2; i<=b->size; i++) {
2039     b->cowners[i] += b->cowners[i-1];
2040   }
2041   b->cstart    = b->cowners[b->rank];
2042   b->cend      = b->cowners[b->rank+1];
2043 
2044   for (i=0; i<=b->size; i++) {
2045     b->rowners_bs[i] = b->rowners[i]*bs;
2046   }
2047   b->rstart_bs = b->rstart*bs;
2048   b->rend_bs   = b->rend*bs;
2049   b->cstart_bs = b->cstart*bs;
2050   b->cend_bs   = b->cend*bs;
2051 
2052   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2053   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2054   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2055   PetscLogObjectParent(B,b->A);
2056   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2057   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2058   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2059   PetscLogObjectParent(B,b->B);
2060 
2061   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2062 
2063   PetscFunctionReturn(0);
2064 }
2065 EXTERN_C_END
2066 
2067 EXTERN_C_BEGIN
2068 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2069 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2070 EXTERN_C_END
2071 
2072 /*MC
2073    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2074 
2075    Options Database Keys:
2076 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2077 
2078   Level: beginner
2079 
2080 .seealso: MatCreateMPIBAIJ
2081 M*/
2082 
2083 EXTERN_C_BEGIN
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatCreate_MPIBAIJ"
2086 int MatCreate_MPIBAIJ(Mat B)
2087 {
2088   Mat_MPIBAIJ  *b;
2089   int          ierr;
2090   PetscTruth   flg;
2091 
2092   PetscFunctionBegin;
2093   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2094   B->data = (void*)b;
2095 
2096   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2097   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2098   B->mapping    = 0;
2099   B->factor     = 0;
2100   B->assembled  = PETSC_FALSE;
2101 
2102   B->insertmode = NOT_SET_VALUES;
2103   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2105 
2106   /* build local table of row and column ownerships */
2107   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2108   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2109   b->cowners    = b->rowners + b->size + 2;
2110   b->rowners_bs = b->cowners + b->size + 2;
2111 
2112   /* build cache for off array entries formed */
2113   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2114   b->donotstash  = PETSC_FALSE;
2115   b->colmap      = PETSC_NULL;
2116   b->garray      = PETSC_NULL;
2117   b->roworiented = PETSC_TRUE;
2118 
2119 #if defined(PETSC_USE_MAT_SINGLE)
2120   /* stuff for MatSetValues_XXX in single precision */
2121   b->setvalueslen     = 0;
2122   b->setvaluescopy    = PETSC_NULL;
2123 #endif
2124 
2125   /* stuff used in block assembly */
2126   b->barray       = 0;
2127 
2128   /* stuff used for matrix vector multiply */
2129   b->lvec         = 0;
2130   b->Mvctx        = 0;
2131 
2132   /* stuff for MatGetRow() */
2133   b->rowindices   = 0;
2134   b->rowvalues    = 0;
2135   b->getrowactive = PETSC_FALSE;
2136 
2137   /* hash table stuff */
2138   b->ht           = 0;
2139   b->hd           = 0;
2140   b->ht_size      = 0;
2141   b->ht_flag      = PETSC_FALSE;
2142   b->ht_fact      = 0;
2143   b->ht_total_ct  = 0;
2144   b->ht_insert_ct = 0;
2145 
2146   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2147   if (flg) {
2148     PetscReal fact = 1.39;
2149     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2150     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2151     if (fact <= 1.0) fact = 1.39;
2152     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2153     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2154   }
2155   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2156                                      "MatStoreValues_MPIBAIJ",
2157                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2158   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2159                                      "MatRetrieveValues_MPIBAIJ",
2160                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2161   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2162                                      "MatGetDiagonalBlock_MPIBAIJ",
2163                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2164   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2165                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2166                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2167   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2168                                      "MatDiagonalScaleLocal_MPIBAIJ",
2169                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2170   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2171                                      "MatSetHashTableFactor_MPIBAIJ",
2172                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2173   PetscFunctionReturn(0);
2174 }
2175 EXTERN_C_END
2176 
2177 /*MC
2178    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2179 
2180    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2181    and MATMPIBAIJ otherwise.
2182 
2183    Options Database Keys:
2184 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2185 
2186   Level: beginner
2187 
2188 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2189 M*/
2190 
2191 EXTERN_C_BEGIN
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatCreate_BAIJ"
2194 int MatCreate_BAIJ(Mat A) {
2195   int ierr,size;
2196 
2197   PetscFunctionBegin;
2198   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2199   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2200   if (size == 1) {
2201     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2202   } else {
2203     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2204   }
2205   PetscFunctionReturn(0);
2206 }
2207 EXTERN_C_END
2208 
2209 #undef __FUNCT__
2210 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2211 /*@C
2212    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2213    (block compressed row).  For good matrix assembly performance
2214    the user should preallocate the matrix storage by setting the parameters
2215    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2216    performance can be increased by more than a factor of 50.
2217 
2218    Collective on Mat
2219 
2220    Input Parameters:
2221 +  A - the matrix
2222 .  bs   - size of blockk
2223 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2224            submatrix  (same for all local rows)
2225 .  d_nnz - array containing the number of block nonzeros in the various block rows
2226            of the in diagonal portion of the local (possibly different for each block
2227            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2228 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2229            submatrix (same for all local rows).
2230 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2231            off-diagonal portion of the local submatrix (possibly different for
2232            each block row) or PETSC_NULL.
2233 
2234    Output Parameter:
2235 
2236 
2237    Options Database Keys:
2238 .   -mat_no_unroll - uses code that does not unroll the loops in the
2239                      block calculations (much slower)
2240 .   -mat_block_size - size of the blocks to use
2241 
2242    Notes:
2243    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2244    than it must be used on all processors that share the object for that argument.
2245 
2246    Storage Information:
2247    For a square global matrix we define each processor's diagonal portion
2248    to be its local rows and the corresponding columns (a square submatrix);
2249    each processor's off-diagonal portion encompasses the remainder of the
2250    local matrix (a rectangular submatrix).
2251 
2252    The user can specify preallocated storage for the diagonal part of
2253    the local submatrix with either d_nz or d_nnz (not both).  Set
2254    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2255    memory allocation.  Likewise, specify preallocated storage for the
2256    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2257 
2258    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2259    the figure below we depict these three local rows and all columns (0-11).
2260 
2261 .vb
2262            0 1 2 3 4 5 6 7 8 9 10 11
2263           -------------------
2264    row 3  |  o o o d d d o o o o o o
2265    row 4  |  o o o d d d o o o o o o
2266    row 5  |  o o o d d d o o o o o o
2267           -------------------
2268 .ve
2269 
2270    Thus, any entries in the d locations are stored in the d (diagonal)
2271    submatrix, and any entries in the o locations are stored in the
2272    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2273    stored simply in the MATSEQBAIJ format for compressed row storage.
2274 
2275    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2276    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2277    In general, for PDE problems in which most nonzeros are near the diagonal,
2278    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2279    or you will get TERRIBLE performance; see the users' manual chapter on
2280    matrices.
2281 
2282    Level: intermediate
2283 
2284 .keywords: matrix, block, aij, compressed row, sparse, parallel
2285 
2286 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2287 @*/
2288 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2289 {
2290   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2291 
2292   PetscFunctionBegin;
2293   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2294   if (f) {
2295     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2296   }
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 #undef __FUNCT__
2301 #define __FUNCT__ "MatCreateMPIBAIJ"
2302 /*@C
2303    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2304    (block compressed row).  For good matrix assembly performance
2305    the user should preallocate the matrix storage by setting the parameters
2306    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2307    performance can be increased by more than a factor of 50.
2308 
2309    Collective on MPI_Comm
2310 
2311    Input Parameters:
2312 +  comm - MPI communicator
2313 .  bs   - size of blockk
2314 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2315            This value should be the same as the local size used in creating the
2316            y vector for the matrix-vector product y = Ax.
2317 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2318            This value should be the same as the local size used in creating the
2319            x vector for the matrix-vector product y = Ax.
2320 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2321 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2322 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2323            submatrix  (same for all local rows)
2324 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2325            of the in diagonal portion of the local (possibly different for each block
2326            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2327 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2328            submatrix (same for all local rows).
2329 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2330            off-diagonal portion of the local submatrix (possibly different for
2331            each block row) or PETSC_NULL.
2332 
2333    Output Parameter:
2334 .  A - the matrix
2335 
2336    Options Database Keys:
2337 .   -mat_no_unroll - uses code that does not unroll the loops in the
2338                      block calculations (much slower)
2339 .   -mat_block_size - size of the blocks to use
2340 
2341    Notes:
2342    A nonzero block is any block that as 1 or more nonzeros in it
2343 
2344    The user MUST specify either the local or global matrix dimensions
2345    (possibly both).
2346 
2347    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2348    than it must be used on all processors that share the object for that argument.
2349 
2350    Storage Information:
2351    For a square global matrix we define each processor's diagonal portion
2352    to be its local rows and the corresponding columns (a square submatrix);
2353    each processor's off-diagonal portion encompasses the remainder of the
2354    local matrix (a rectangular submatrix).
2355 
2356    The user can specify preallocated storage for the diagonal part of
2357    the local submatrix with either d_nz or d_nnz (not both).  Set
2358    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2359    memory allocation.  Likewise, specify preallocated storage for the
2360    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2361 
2362    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2363    the figure below we depict these three local rows and all columns (0-11).
2364 
2365 .vb
2366            0 1 2 3 4 5 6 7 8 9 10 11
2367           -------------------
2368    row 3  |  o o o d d d o o o o o o
2369    row 4  |  o o o d d d o o o o o o
2370    row 5  |  o o o d d d o o o o o o
2371           -------------------
2372 .ve
2373 
2374    Thus, any entries in the d locations are stored in the d (diagonal)
2375    submatrix, and any entries in the o locations are stored in the
2376    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2377    stored simply in the MATSEQBAIJ format for compressed row storage.
2378 
2379    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2380    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2381    In general, for PDE problems in which most nonzeros are near the diagonal,
2382    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2383    or you will get TERRIBLE performance; see the users' manual chapter on
2384    matrices.
2385 
2386    Level: intermediate
2387 
2388 .keywords: matrix, block, aij, compressed row, sparse, parallel
2389 
2390 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2391 @*/
2392 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2393 {
2394   int ierr,size;
2395 
2396   PetscFunctionBegin;
2397   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2398   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2399   if (size > 1) {
2400     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2401     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2402   } else {
2403     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2404     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2405   }
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2411 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2412 {
2413   Mat         mat;
2414   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2415   int         ierr,len=0;
2416 
2417   PetscFunctionBegin;
2418   *newmat       = 0;
2419   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2420   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2421   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2422 
2423   mat->factor       = matin->factor;
2424   mat->preallocated = PETSC_TRUE;
2425   mat->assembled    = PETSC_TRUE;
2426   mat->insertmode   = NOT_SET_VALUES;
2427 
2428   a      = (Mat_MPIBAIJ*)mat->data;
2429   a->bs  = oldmat->bs;
2430   a->bs2 = oldmat->bs2;
2431   a->mbs = oldmat->mbs;
2432   a->nbs = oldmat->nbs;
2433   a->Mbs = oldmat->Mbs;
2434   a->Nbs = oldmat->Nbs;
2435 
2436   a->rstart       = oldmat->rstart;
2437   a->rend         = oldmat->rend;
2438   a->cstart       = oldmat->cstart;
2439   a->cend         = oldmat->cend;
2440   a->size         = oldmat->size;
2441   a->rank         = oldmat->rank;
2442   a->donotstash   = oldmat->donotstash;
2443   a->roworiented  = oldmat->roworiented;
2444   a->rowindices   = 0;
2445   a->rowvalues    = 0;
2446   a->getrowactive = PETSC_FALSE;
2447   a->barray       = 0;
2448   a->rstart_bs    = oldmat->rstart_bs;
2449   a->rend_bs      = oldmat->rend_bs;
2450   a->cstart_bs    = oldmat->cstart_bs;
2451   a->cend_bs      = oldmat->cend_bs;
2452 
2453   /* hash table stuff */
2454   a->ht           = 0;
2455   a->hd           = 0;
2456   a->ht_size      = 0;
2457   a->ht_flag      = oldmat->ht_flag;
2458   a->ht_fact      = oldmat->ht_fact;
2459   a->ht_total_ct  = 0;
2460   a->ht_insert_ct = 0;
2461 
2462   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2463   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2464   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2465   if (oldmat->colmap) {
2466 #if defined (PETSC_USE_CTABLE)
2467   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2468 #else
2469   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2470   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2471   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2472 #endif
2473   } else a->colmap = 0;
2474 
2475   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2476     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2477     PetscLogObjectMemory(mat,len*sizeof(int));
2478     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2479   } else a->garray = 0;
2480 
2481   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2482   PetscLogObjectParent(mat,a->lvec);
2483   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2484   PetscLogObjectParent(mat,a->Mvctx);
2485 
2486   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2487   PetscLogObjectParent(mat,a->A);
2488   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2489   PetscLogObjectParent(mat,a->B);
2490   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2491   *newmat = mat;
2492 
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 #include "petscsys.h"
2497 
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatLoad_MPIBAIJ"
2500 int MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2501 {
2502   Mat          A;
2503   int          i,nz,ierr,j,rstart,rend,fd;
2504   PetscScalar  *vals,*buf;
2505   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2506   MPI_Status   status;
2507   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2508   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2509   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2510   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2511   int          dcount,kmax,k,nzcount,tmp;
2512 
2513   PetscFunctionBegin;
2514   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2515 
2516   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2517   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2518   if (!rank) {
2519     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2520     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2521     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2522     if (header[3] < 0) {
2523       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2524     }
2525   }
2526 
2527   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2528   M = header[1]; N = header[2];
2529 
2530   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2531 
2532   /*
2533      This code adds extra rows to make sure the number of rows is
2534      divisible by the blocksize
2535   */
2536   Mbs        = M/bs;
2537   extra_rows = bs - M + bs*(Mbs);
2538   if (extra_rows == bs) extra_rows = 0;
2539   else                  Mbs++;
2540   if (extra_rows &&!rank) {
2541     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2542   }
2543 
2544   /* determine ownership of all rows */
2545   mbs        = Mbs/size + ((Mbs % size) > rank);
2546   m          = mbs*bs;
2547   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2548   browners   = rowners + size + 1;
2549   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2550   rowners[0] = 0;
2551   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2552   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2553   rstart = rowners[rank];
2554   rend   = rowners[rank+1];
2555 
2556   /* distribute row lengths to all processors */
2557   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2558   if (!rank) {
2559     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2560     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2561     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2562     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2563     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2564     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2565     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2566   } else {
2567     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2568   }
2569 
2570   if (!rank) {
2571     /* calculate the number of nonzeros on each processor */
2572     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2573     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2574     for (i=0; i<size; i++) {
2575       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2576         procsnz[i] += rowlengths[j];
2577       }
2578     }
2579     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2580 
2581     /* determine max buffer needed and allocate it */
2582     maxnz = 0;
2583     for (i=0; i<size; i++) {
2584       maxnz = PetscMax(maxnz,procsnz[i]);
2585     }
2586     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2587 
2588     /* read in my part of the matrix column indices  */
2589     nz     = procsnz[0];
2590     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2591     mycols = ibuf;
2592     if (size == 1)  nz -= extra_rows;
2593     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2594     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2595 
2596     /* read in every ones (except the last) and ship off */
2597     for (i=1; i<size-1; i++) {
2598       nz   = procsnz[i];
2599       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2600       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2601     }
2602     /* read in the stuff for the last proc */
2603     if (size != 1) {
2604       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2605       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2606       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2607       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2608     }
2609     ierr = PetscFree(cols);CHKERRQ(ierr);
2610   } else {
2611     /* determine buffer space needed for message */
2612     nz = 0;
2613     for (i=0; i<m; i++) {
2614       nz += locrowlens[i];
2615     }
2616     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2617     mycols = ibuf;
2618     /* receive message of column indices*/
2619     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2620     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2621     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2622   }
2623 
2624   /* loop over local rows, determining number of off diagonal entries */
2625   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2626   odlens   = dlens + (rend-rstart);
2627   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2628   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2629   masked1  = mask    + Mbs;
2630   masked2  = masked1 + Mbs;
2631   rowcount = 0; nzcount = 0;
2632   for (i=0; i<mbs; i++) {
2633     dcount  = 0;
2634     odcount = 0;
2635     for (j=0; j<bs; j++) {
2636       kmax = locrowlens[rowcount];
2637       for (k=0; k<kmax; k++) {
2638         tmp = mycols[nzcount++]/bs;
2639         if (!mask[tmp]) {
2640           mask[tmp] = 1;
2641           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2642           else masked1[dcount++] = tmp;
2643         }
2644       }
2645       rowcount++;
2646     }
2647 
2648     dlens[i]  = dcount;
2649     odlens[i] = odcount;
2650 
2651     /* zero out the mask elements we set */
2652     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2653     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2654   }
2655 
2656   /* create our matrix */
2657   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2658   ierr = MatSetType(A,type);CHKERRQ(ierr)
2659   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2660 
2661   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2662   MatSetOption(A,MAT_COLUMNS_SORTED);
2663 
2664   if (!rank) {
2665     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2666     /* read in my part of the matrix numerical values  */
2667     nz = procsnz[0];
2668     vals = buf;
2669     mycols = ibuf;
2670     if (size == 1)  nz -= extra_rows;
2671     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2672     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2673 
2674     /* insert into matrix */
2675     jj      = rstart*bs;
2676     for (i=0; i<m; i++) {
2677       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2678       mycols += locrowlens[i];
2679       vals   += locrowlens[i];
2680       jj++;
2681     }
2682     /* read in other processors (except the last one) and ship out */
2683     for (i=1; i<size-1; i++) {
2684       nz   = procsnz[i];
2685       vals = buf;
2686       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2687       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2688     }
2689     /* the last proc */
2690     if (size != 1){
2691       nz   = procsnz[i] - extra_rows;
2692       vals = buf;
2693       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2694       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2695       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2696     }
2697     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2698   } else {
2699     /* receive numeric values */
2700     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2701 
2702     /* receive message of values*/
2703     vals   = buf;
2704     mycols = ibuf;
2705     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2706     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2707     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2708 
2709     /* insert into matrix */
2710     jj      = rstart*bs;
2711     for (i=0; i<m; i++) {
2712       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2713       mycols += locrowlens[i];
2714       vals   += locrowlens[i];
2715       jj++;
2716     }
2717   }
2718   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2719   ierr = PetscFree(buf);CHKERRQ(ierr);
2720   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2721   ierr = PetscFree(rowners);CHKERRQ(ierr);
2722   ierr = PetscFree(dlens);CHKERRQ(ierr);
2723   ierr = PetscFree(mask);CHKERRQ(ierr);
2724   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2725   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2726 
2727   *newmat = A;
2728   PetscFunctionReturn(0);
2729 }
2730 
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2733 /*@
2734    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2735 
2736    Input Parameters:
2737 .  mat  - the matrix
2738 .  fact - factor
2739 
2740    Collective on Mat
2741 
2742    Level: advanced
2743 
2744   Notes:
2745    This can also be set by the command line option: -mat_use_hash_table fact
2746 
2747 .keywords: matrix, hashtable, factor, HT
2748 
2749 .seealso: MatSetOption()
2750 @*/
2751 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2752 {
2753   int ierr,(*f)(Mat,PetscReal);
2754 
2755   PetscFunctionBegin;
2756   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2757   if (f) {
2758     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2759   }
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 #undef __FUNCT__
2764 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2765 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2766 {
2767   Mat_MPIBAIJ *baij;
2768 
2769   PetscFunctionBegin;
2770   baij = (Mat_MPIBAIJ*)mat->data;
2771   baij->ht_fact = fact;
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 #undef __FUNCT__
2776 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2777 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2778 {
2779   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2780   PetscFunctionBegin;
2781   *Ad     = a->A;
2782   *Ao     = a->B;
2783   *colmap = a->garray;
2784   PetscFunctionReturn(0);
2785 }
2786