xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4482741e5b2e2bbc854fb1f8dba65221386520f2)
1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN int DisAssemble_MPIBAIJ(Mat);
7 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
8 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
10 EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
11 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
12 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
13 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
14 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
15 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
16 
17 /*  UGLY, ugly, ugly
18    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
19    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
20    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
21    converts the entries into single precision and then calls ..._MatScalar() to put them
22    into the single precision data structures.
23 */
24 #if defined(PETSC_USE_MAT_SINGLE)
25 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
26 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
27 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
28 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
29 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
30 #else
31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
32 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
34 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
36 #endif
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
41 {
42   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
43   int          ierr,i;
44   PetscScalar  *va,*vb;
45   Vec          vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 int MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
73   int         ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 int MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
88   int         ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 int CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
109   int         nbs = B->nbs,i,bs=B->bs,ierr;
110 
111   PetscFunctionBegin;
112 #if defined (PETSC_USE_CTABLE)
113   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
114   for (i=0; i<nbs; i++){
115     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
116   }
117 #else
118   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
119   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
120   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
121   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
122 #endif
123   PetscFunctionReturn(0);
124 }
125 
126 #define CHUNKSIZE  10
127 
128 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
129 { \
130  \
131     brow = row/bs;  \
132     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
133     rmax = aimax[brow]; nrow = ailen[brow]; \
134       bcol = col/bs; \
135       ridx = row % bs; cidx = col % bs; \
136       low = 0; high = nrow; \
137       while (high-low > 3) { \
138         t = (low+high)/2; \
139         if (rp[t] > bcol) high = t; \
140         else              low  = t; \
141       } \
142       for (_i=low; _i<high; _i++) { \
143         if (rp[_i] > bcol) break; \
144         if (rp[_i] == bcol) { \
145           bap  = ap +  bs2*_i + bs*cidx + ridx; \
146           if (addv == ADD_VALUES) *bap += value;  \
147           else                    *bap  = value;  \
148           goto a_noinsert; \
149         } \
150       } \
151       if (a->nonew == 1) goto a_noinsert; \
152       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
153       if (nrow >= rmax) { \
154         /* there is no extra room in row, therefore enlarge */ \
155         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
156         MatScalar *new_a; \
157  \
158         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
159  \
160         /* malloc new storage space */ \
161         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
162         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
163         new_j   = (int*)(new_a + bs2*new_nz); \
164         new_i   = new_j + new_nz; \
165  \
166         /* copy over old data into new slots */ \
167         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
168         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
169         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
170         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
171         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
172         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
173         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
174         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
175                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
176         /* free up old matrix storage */ \
177         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
178         if (!a->singlemalloc) { \
179           ierr = PetscFree(a->i);CHKERRQ(ierr); \
180           ierr = PetscFree(a->j);CHKERRQ(ierr);\
181         } \
182         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
183         a->singlemalloc = PETSC_TRUE; \
184  \
185         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
186         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
187         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
188         a->maxnz += bs2*CHUNKSIZE; \
189         a->reallocs++; \
190         a->nz++; \
191       } \
192       N = nrow++ - 1;  \
193       /* shift up all the later entries in this row */ \
194       for (ii=N; ii>=_i; ii--) { \
195         rp[ii+1] = rp[ii]; \
196         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
197       } \
198       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
199       rp[_i]                      = bcol;  \
200       ap[bs2*_i + bs*cidx + ridx] = value;  \
201       a_noinsert:; \
202     ailen[brow] = nrow; \
203 }
204 
205 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
206 { \
207     brow = row/bs;  \
208     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
209     rmax = bimax[brow]; nrow = bilen[brow]; \
210       bcol = col/bs; \
211       ridx = row % bs; cidx = col % bs; \
212       low = 0; high = nrow; \
213       while (high-low > 3) { \
214         t = (low+high)/2; \
215         if (rp[t] > bcol) high = t; \
216         else              low  = t; \
217       } \
218       for (_i=low; _i<high; _i++) { \
219         if (rp[_i] > bcol) break; \
220         if (rp[_i] == bcol) { \
221           bap  = ap +  bs2*_i + bs*cidx + ridx; \
222           if (addv == ADD_VALUES) *bap += value;  \
223           else                    *bap  = value;  \
224           goto b_noinsert; \
225         } \
226       } \
227       if (b->nonew == 1) goto b_noinsert; \
228       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
229       if (nrow >= rmax) { \
230         /* there is no extra room in row, therefore enlarge */ \
231         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
232         MatScalar *new_a; \
233  \
234         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
235  \
236         /* malloc new storage space */ \
237         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
238         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
239         new_j   = (int*)(new_a + bs2*new_nz); \
240         new_i   = new_j + new_nz; \
241  \
242         /* copy over old data into new slots */ \
243         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
244         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
245         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
246         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
247         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
248         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
249         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
251                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
252         /* free up old matrix storage */ \
253         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
254         if (!b->singlemalloc) { \
255           ierr = PetscFree(b->i);CHKERRQ(ierr); \
256           ierr = PetscFree(b->j);CHKERRQ(ierr); \
257         } \
258         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
259         b->singlemalloc = PETSC_TRUE; \
260  \
261         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
262         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
263         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
264         b->maxnz += bs2*CHUNKSIZE; \
265         b->reallocs++; \
266         b->nz++; \
267       } \
268       N = nrow++ - 1;  \
269       /* shift up all the later entries in this row */ \
270       for (ii=N; ii>=_i; ii--) { \
271         rp[ii+1] = rp[ii]; \
272         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
273       } \
274       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
275       rp[_i]                      = bcol;  \
276       ap[bs2*_i + bs*cidx + ridx] = value;  \
277       b_noinsert:; \
278     bilen[brow] = nrow; \
279 }
280 
281 #if defined(PETSC_USE_MAT_SINGLE)
282 #undef __FUNCT__
283 #define __FUNCT__ "MatSetValues_MPIBAIJ"
284 int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
285 {
286   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
287   int         ierr,i,N = m*n;
288   MatScalar   *vsingle;
289 
290   PetscFunctionBegin;
291   if (N > b->setvalueslen) {
292     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
293     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
294     b->setvalueslen  = N;
295   }
296   vsingle = b->setvaluescopy;
297 
298   for (i=0; i<N; i++) {
299     vsingle[i] = v[i];
300   }
301   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
307 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
308 {
309   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
310   int         ierr,i,N = m*n*b->bs2;
311   MatScalar   *vsingle;
312 
313   PetscFunctionBegin;
314   if (N > b->setvalueslen) {
315     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
316     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
317     b->setvalueslen  = N;
318   }
319   vsingle = b->setvaluescopy;
320   for (i=0; i<N; i++) {
321     vsingle[i] = v[i];
322   }
323   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
329 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
330 {
331   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
332   int         ierr,i,N = m*n;
333   MatScalar   *vsingle;
334 
335   PetscFunctionBegin;
336   if (N > b->setvalueslen) {
337     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
338     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
339     b->setvalueslen  = N;
340   }
341   vsingle = b->setvaluescopy;
342   for (i=0; i<N; i++) {
343     vsingle[i] = v[i];
344   }
345   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
346   PetscFunctionReturn(0);
347 }
348 
349 #undef __FUNCT__
350 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
351 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
352 {
353   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
354   int         ierr,i,N = m*n*b->bs2;
355   MatScalar   *vsingle;
356 
357   PetscFunctionBegin;
358   if (N > b->setvalueslen) {
359     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
360     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
361     b->setvalueslen  = N;
362   }
363   vsingle = b->setvaluescopy;
364   for (i=0; i<N; i++) {
365     vsingle[i] = v[i];
366   }
367   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
368   PetscFunctionReturn(0);
369 }
370 #endif
371 
372 #undef __FUNCT__
373 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
374 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
375 {
376   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
377   MatScalar   value;
378   PetscTruth  roworiented = baij->roworiented;
379   int         ierr,i,j,row,col;
380   int         rstart_orig=baij->rstart_bs;
381   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
382   int         cend_orig=baij->cend_bs,bs=baij->bs;
383 
384   /* Some Variables required in the macro */
385   Mat         A = baij->A;
386   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
387   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
388   MatScalar   *aa=a->a;
389 
390   Mat         B = baij->B;
391   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
392   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
393   MatScalar   *ba=b->a;
394 
395   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
396   int         low,high,t,ridx,cidx,bs2=a->bs2;
397   MatScalar   *ap,*bap;
398 
399   PetscFunctionBegin;
400   for (i=0; i<m; i++) {
401     if (im[i] < 0) continue;
402 #if defined(PETSC_USE_BOPT_g)
403     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
404 #endif
405     if (im[i] >= rstart_orig && im[i] < rend_orig) {
406       row = im[i] - rstart_orig;
407       for (j=0; j<n; j++) {
408         if (in[j] >= cstart_orig && in[j] < cend_orig){
409           col = in[j] - cstart_orig;
410           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
412           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
413         } else if (in[j] < 0) continue;
414 #if defined(PETSC_USE_BOPT_g)
415         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
416 #endif
417         else {
418           if (mat->was_assembled) {
419             if (!baij->colmap) {
420               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
421             }
422 #if defined (PETSC_USE_CTABLE)
423             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
424             col  = col - 1;
425 #else
426             col = baij->colmap[in[j]/bs] - 1;
427 #endif
428             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
429               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
430               col =  in[j];
431               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
432               B = baij->B;
433               b = (Mat_SeqBAIJ*)(B)->data;
434               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
435               ba=b->a;
436             } else col += in[j]%bs;
437           } else col = in[j];
438           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
439           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
440           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
441         }
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
458 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
459 {
460   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
461   const MatScalar *value;
462   MatScalar       *barray=baij->barray;
463   PetscTruth      roworiented = baij->roworiented;
464   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
465   int             rend=baij->rend,cstart=baij->cstart,stepval;
466   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
467 
468   PetscFunctionBegin;
469   if(!barray) {
470     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
471     baij->barray = barray;
472   }
473 
474   if (roworiented) {
475     stepval = (n-1)*bs;
476   } else {
477     stepval = (m-1)*bs;
478   }
479   for (i=0; i<m; i++) {
480     if (im[i] < 0) continue;
481 #if defined(PETSC_USE_BOPT_g)
482     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
483 #endif
484     if (im[i] >= rstart && im[i] < rend) {
485       row = im[i] - rstart;
486       for (j=0; j<n; j++) {
487         /* If NumCol = 1 then a copy is not required */
488         if ((roworiented) && (n == 1)) {
489           barray = (MatScalar*)v + i*bs2;
490         } else if((!roworiented) && (m == 1)) {
491           barray = (MatScalar*)v + j*bs2;
492         } else { /* Here a copy is required */
493           if (roworiented) {
494             value = v + i*(stepval+bs)*bs + j*bs;
495           } else {
496             value = v + j*(stepval+bs)*bs + i*bs;
497           }
498           for (ii=0; ii<bs; ii++,value+=stepval) {
499             for (jj=0; jj<bs; jj++) {
500               *barray++  = *value++;
501             }
502           }
503           barray -=bs2;
504         }
505 
506         if (in[j] >= cstart && in[j] < cend){
507           col  = in[j] - cstart;
508           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510         else if (in[j] < 0) continue;
511 #if defined(PETSC_USE_BOPT_g)
512         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
513 #endif
514         else {
515           if (mat->was_assembled) {
516             if (!baij->colmap) {
517               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
518             }
519 
520 #if defined(PETSC_USE_BOPT_g)
521 #if defined (PETSC_USE_CTABLE)
522             { int data;
523               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
525             }
526 #else
527             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
528 #endif
529 #endif
530 #if defined (PETSC_USE_CTABLE)
531 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532             col  = (col - 1)/bs;
533 #else
534             col = (baij->colmap[in[j]] - 1)/bs;
535 #endif
536             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
538               col =  in[j];
539             }
540           }
541           else col = in[j];
542           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555   PetscFunctionReturn(0);
556 }
557 
558 #define HASH_KEY 0.6180339887
559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562 #undef __FUNCT__
563 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
565 {
566   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscTruth  roworiented = baij->roworiented;
568   int         ierr,i,j,row,col;
569   int         rstart_orig=baij->rstart_bs;
570   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
571   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
572   PetscReal   tmp;
573   MatScalar   **HD = baij->hd,value;
574 #if defined(PETSC_USE_BOPT_g)
575   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
576 #endif
577 
578   PetscFunctionBegin;
579 
580   for (i=0; i<m; i++) {
581 #if defined(PETSC_USE_BOPT_g)
582     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
583     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
584 #endif
585       row = im[i];
586     if (row >= rstart_orig && row < rend_orig) {
587       for (j=0; j<n; j++) {
588         col = in[j];
589         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
590         /* Look up into the Hash Table */
591         key = (row/bs)*Nbs+(col/bs)+1;
592         h1  = HASH(size,key,tmp);
593 
594 
595         idx = h1;
596 #if defined(PETSC_USE_BOPT_g)
597         insert_ct++;
598         total_ct++;
599         if (HT[idx] != key) {
600           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
601           if (idx == size) {
602             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
603             if (idx == h1) {
604               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
605             }
606           }
607         }
608 #else
609         if (HT[idx] != key) {
610           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
611           if (idx == size) {
612             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
613             if (idx == h1) {
614               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
615             }
616           }
617         }
618 #endif
619         /* A HASH table entry is found, so insert the values at the correct address */
620         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
621         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_BOPT_g)
634   baij->ht_total_ct = total_ct;
635   baij->ht_insert_ct = insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
643 {
644   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
645   PetscTruth      roworiented = baij->roworiented;
646   int             ierr,i,j,ii,jj,row,col;
647   int             rstart=baij->rstart ;
648   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
649   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
650   PetscReal       tmp;
651   MatScalar       **HD = baij->hd,*baij_a;
652   const MatScalar *v_t,*value;
653 #if defined(PETSC_USE_BOPT_g)
654   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
655 #endif
656 
657   PetscFunctionBegin;
658 
659   if (roworiented) {
660     stepval = (n-1)*bs;
661   } else {
662     stepval = (m-1)*bs;
663   }
664   for (i=0; i<m; i++) {
665 #if defined(PETSC_USE_BOPT_g)
666     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
667     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
668 #endif
669     row   = im[i];
670     v_t   = v + i*bs2;
671     if (row >= rstart && row < rend) {
672       for (j=0; j<n; j++) {
673         col = in[j];
674 
675         /* Look up into the Hash Table */
676         key = row*Nbs+col+1;
677         h1  = HASH(size,key,tmp);
678 
679         idx = h1;
680 #if defined(PETSC_USE_BOPT_g)
681         total_ct++;
682         insert_ct++;
683        if (HT[idx] != key) {
684           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
685           if (idx == size) {
686             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
687             if (idx == h1) {
688               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
689             }
690           }
691         }
692 #else
693         if (HT[idx] != key) {
694           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
695           if (idx == size) {
696             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
697             if (idx == h1) {
698               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
699             }
700           }
701         }
702 #endif
703         baij_a = HD[idx];
704         if (roworiented) {
705           /*value = v + i*(stepval+bs)*bs + j*bs;*/
706           /* value = v + (i*(stepval+bs)+j)*bs; */
707           value = v_t;
708           v_t  += bs;
709           if (addv == ADD_VALUES) {
710             for (ii=0; ii<bs; ii++,value+=stepval) {
711               for (jj=ii; jj<bs2; jj+=bs) {
712                 baij_a[jj]  += *value++;
713               }
714             }
715           } else {
716             for (ii=0; ii<bs; ii++,value+=stepval) {
717               for (jj=ii; jj<bs2; jj+=bs) {
718                 baij_a[jj]  = *value++;
719               }
720             }
721           }
722         } else {
723           value = v + j*(stepval+bs)*bs + i*bs;
724           if (addv == ADD_VALUES) {
725             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
726               for (jj=0; jj<bs; jj++) {
727                 baij_a[jj]  += *value++;
728               }
729             }
730           } else {
731             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
732               for (jj=0; jj<bs; jj++) {
733                 baij_a[jj]  = *value++;
734               }
735             }
736           }
737         }
738       }
739     } else {
740       if (!baij->donotstash) {
741         if (roworiented) {
742           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743         } else {
744           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
745         }
746       }
747     }
748   }
749 #if defined(PETSC_USE_BOPT_g)
750   baij->ht_total_ct = total_ct;
751   baij->ht_insert_ct = insert_ct;
752 #endif
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatGetValues_MPIBAIJ"
758 int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
759 {
760   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
761   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
762   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
763 
764   PetscFunctionBegin;
765   for (i=0; i<m; i++) {
766     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
767     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
768     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
769       row = idxm[i] - bsrstart;
770       for (j=0; j<n; j++) {
771         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
772         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
773         if (idxn[j] >= bscstart && idxn[j] < bscend){
774           col = idxn[j] - bscstart;
775           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776         } else {
777           if (!baij->colmap) {
778             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
779           }
780 #if defined (PETSC_USE_CTABLE)
781           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
782           data --;
783 #else
784           data = baij->colmap[idxn[j]/bs]-1;
785 #endif
786           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
787           else {
788             col  = data + idxn[j]%bs;
789             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
790           }
791         }
792       }
793     } else {
794       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
795     }
796   }
797  PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatNorm_MPIBAIJ"
802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
803 {
804   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
805   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
806   int        ierr,i,bs2=baij->bs2;
807   PetscReal  sum = 0.0;
808   MatScalar  *v;
809 
810   PetscFunctionBegin;
811   if (baij->size == 1) {
812     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
813   } else {
814     if (type == NORM_FROBENIUS) {
815       v = amat->a;
816       for (i=0; i<amat->nz*bs2; i++) {
817 #if defined(PETSC_USE_COMPLEX)
818         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
819 #else
820         sum += (*v)*(*v); v++;
821 #endif
822       }
823       v = bmat->a;
824       for (i=0; i<bmat->nz*bs2; i++) {
825 #if defined(PETSC_USE_COMPLEX)
826         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
827 #else
828         sum += (*v)*(*v); v++;
829 #endif
830       }
831       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
832       *nrm = sqrt(*nrm);
833     } else {
834       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835     }
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat         A = baij->A,B=baij->B;
854   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
857   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858   int         *HT,key;
859   MatScalar   **HD;
860   PetscReal   tmp;
861 #if defined(PETSC_USE_BOPT_g)
862   int         ct=0,max=0;
863 #endif
864 
865   PetscFunctionBegin;
866   baij->ht_size=(int)(factor*nz);
867   size = baij->ht_size;
868 
869   if (baij->ht) {
870     PetscFunctionReturn(0);
871   }
872 
873   /* Allocate Memory for Hash Table */
874   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
875   baij->ht = (int*)(baij->hd + size);
876   HD       = baij->hd;
877   HT       = baij->ht;
878 
879 
880   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
881 
882 
883   /* Loop Over A */
884   for (i=0; i<a->mbs; i++) {
885     for (j=ai[i]; j<ai[i+1]; j++) {
886       row = i+rstart;
887       col = aj[j]+cstart;
888 
889       key = row*Nbs + col + 1;
890       h1  = HASH(size,key,tmp);
891       for (k=0; k<size; k++){
892         if (HT[(h1+k)%size] == 0.0) {
893           HT[(h1+k)%size] = key;
894           HD[(h1+k)%size] = a->a + j*bs2;
895           break;
896 #if defined(PETSC_USE_BOPT_g)
897         } else {
898           ct++;
899 #endif
900         }
901       }
902 #if defined(PETSC_USE_BOPT_g)
903       if (k> max) max = k;
904 #endif
905     }
906   }
907   /* Loop Over B */
908   for (i=0; i<b->mbs; i++) {
909     for (j=bi[i]; j<bi[i+1]; j++) {
910       row = i+rstart;
911       col = garray[bj[j]];
912       key = row*Nbs + col + 1;
913       h1  = HASH(size,key,tmp);
914       for (k=0; k<size; k++){
915         if (HT[(h1+k)%size] == 0.0) {
916           HT[(h1+k)%size] = key;
917           HD[(h1+k)%size] = b->a + j*bs2;
918           break;
919 #if defined(PETSC_USE_BOPT_g)
920         } else {
921           ct++;
922 #endif
923         }
924       }
925 #if defined(PETSC_USE_BOPT_g)
926       if (k> max) max = k;
927 #endif
928     }
929   }
930 
931   /* Print Summary */
932 #if defined(PETSC_USE_BOPT_g)
933   for (i=0,j=0; i<size; i++) {
934     if (HT[i]) {j++;}
935   }
936   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
937 #endif
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944 {
945   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
946   int         ierr,nstash,reallocs;
947   InsertMode  addv;
948 
949   PetscFunctionBegin;
950   if (baij->donotstash) {
951     PetscFunctionReturn(0);
952   }
953 
954   /* make sure all processors are either in INSERTMODE or ADDMODE */
955   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
956   if (addv == (ADD_VALUES|INSERT_VALUES)) {
957     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
958   }
959   mat->insertmode = addv; /* in case this processor had no cache */
960 
961   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
962   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
963   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
964   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
965   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
966   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
967   PetscFunctionReturn(0);
968 }
969 
970 #undef __FUNCT__
971 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
972 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
973 {
974   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
975   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
976   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
977   int         *row,*col,other_disassembled;
978   PetscTruth  r1,r2,r3;
979   MatScalar   *val;
980   InsertMode  addv = mat->insertmode;
981 
982   PetscFunctionBegin;
983   if (!baij->donotstash) {
984     while (1) {
985       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
986       if (!flg) break;
987 
988       for (i=0; i<n;) {
989         /* Now identify the consecutive vals belonging to the same row */
990         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
991         if (j < n) ncols = j-i;
992         else       ncols = n-i;
993         /* Now assemble all these values with a single function call */
994         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
995         i = j;
996       }
997     }
998     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
999     /* Now process the block-stash. Since the values are stashed column-oriented,
1000        set the roworiented flag to column oriented, and after MatSetValues()
1001        restore the original flags */
1002     r1 = baij->roworiented;
1003     r2 = a->roworiented;
1004     r3 = b->roworiented;
1005     baij->roworiented = PETSC_FALSE;
1006     a->roworiented    = PETSC_FALSE;
1007     b->roworiented    = PETSC_FALSE;
1008     while (1) {
1009       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1010       if (!flg) break;
1011 
1012       for (i=0; i<n;) {
1013         /* Now identify the consecutive vals belonging to the same row */
1014         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1015         if (j < n) ncols = j-i;
1016         else       ncols = n-i;
1017         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1018         i = j;
1019       }
1020     }
1021     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1022     baij->roworiented = r1;
1023     a->roworiented    = r2;
1024     b->roworiented    = r3;
1025   }
1026 
1027   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1028   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1029 
1030   /* determine if any processor has disassembled, if so we must
1031      also disassemble ourselfs, in order that we may reassemble. */
1032   /*
1033      if nonzero structure of submatrix B cannot change then we know that
1034      no processor disassembled thus we can skip this stuff
1035   */
1036   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1037     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1038     if (mat->was_assembled && !other_disassembled) {
1039       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1040     }
1041   }
1042 
1043   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1044     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1045   }
1046   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1047   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1048 
1049 #if defined(PETSC_USE_BOPT_g)
1050   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1051     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1052     baij->ht_total_ct  = 0;
1053     baij->ht_insert_ct = 0;
1054   }
1055 #endif
1056   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1057     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1058     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1059     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1060   }
1061 
1062   if (baij->rowvalues) {
1063     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1064     baij->rowvalues = 0;
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1071 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1072 {
1073   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1074   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1075   PetscTruth        isascii,isdraw;
1076   PetscViewer       sviewer;
1077   PetscViewerFormat format;
1078 
1079   PetscFunctionBegin;
1080   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1081   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1082   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1083   if (isascii) {
1084     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1085     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1086       MatInfo info;
1087       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1088       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1089       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1090               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1091               baij->bs,(int)info.memory);CHKERRQ(ierr);
1092       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1093       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1094       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1095       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1096       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1097       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1100       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1101       PetscFunctionReturn(0);
1102     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1103       PetscFunctionReturn(0);
1104     }
1105   }
1106 
1107   if (isdraw) {
1108     PetscDraw       draw;
1109     PetscTruth isnull;
1110     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1111     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1112   }
1113 
1114   if (size == 1) {
1115     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1116     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1117   } else {
1118     /* assemble the entire matrix onto first processor. */
1119     Mat         A;
1120     Mat_SeqBAIJ *Aloc;
1121     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1122     MatScalar   *a;
1123 
1124     if (!rank) {
1125       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1126     } else {
1127       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1128     }
1129     PetscLogObjectParent(mat,A);
1130 
1131     /* copy over the A part */
1132     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1133     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1134     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1135 
1136     for (i=0; i<mbs; i++) {
1137       rvals[0] = bs*(baij->rstart + i);
1138       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1139       for (j=ai[i]; j<ai[i+1]; j++) {
1140         col = (baij->cstart+aj[j])*bs;
1141         for (k=0; k<bs; k++) {
1142           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1143           col++; a += bs;
1144         }
1145       }
1146     }
1147     /* copy over the B part */
1148     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1149     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1150     for (i=0; i<mbs; i++) {
1151       rvals[0] = bs*(baij->rstart + i);
1152       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1153       for (j=ai[i]; j<ai[i+1]; j++) {
1154         col = baij->garray[aj[j]]*bs;
1155         for (k=0; k<bs; k++) {
1156           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1157           col++; a += bs;
1158         }
1159       }
1160     }
1161     ierr = PetscFree(rvals);CHKERRQ(ierr);
1162     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1163     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1164     /*
1165        Everyone has to call to draw the matrix since the graphics waits are
1166        synchronized across all processors that share the PetscDraw object
1167     */
1168     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1169     if (!rank) {
1170       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1171       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1172     }
1173     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1174     ierr = MatDestroy(A);CHKERRQ(ierr);
1175   }
1176   PetscFunctionReturn(0);
1177 }
1178 
1179 #undef __FUNCT__
1180 #define __FUNCT__ "MatView_MPIBAIJ"
1181 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1182 {
1183   int        ierr;
1184   PetscTruth isascii,isdraw,issocket,isbinary;
1185 
1186   PetscFunctionBegin;
1187   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1188   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1189   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1190   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1191   if (isascii || isdraw || issocket || isbinary) {
1192     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1193   } else {
1194     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1201 int MatDestroy_MPIBAIJ(Mat mat)
1202 {
1203   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1204   int         ierr;
1205 
1206   PetscFunctionBegin;
1207 #if defined(PETSC_USE_LOG)
1208   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1209 #endif
1210   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1211   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1212   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1213   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1214   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1215 #if defined (PETSC_USE_CTABLE)
1216   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1217 #else
1218   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1219 #endif
1220   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1221   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1222   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1223   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1224   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1225   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1226 #if defined(PETSC_USE_MAT_SINGLE)
1227   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1228 #endif
1229   ierr = PetscFree(baij);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatMult_MPIBAIJ"
1235 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1236 {
1237   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1238   int         ierr,nt;
1239 
1240   PetscFunctionBegin;
1241   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1242   if (nt != A->n) {
1243     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1244   }
1245   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1246   if (nt != A->m) {
1247     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1248   }
1249   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1250   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1251   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1252   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1253   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1259 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1260 {
1261   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1262   int        ierr;
1263 
1264   PetscFunctionBegin;
1265   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1266   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1267   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1268   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1274 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1275 {
1276   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1277   int         ierr;
1278 
1279   PetscFunctionBegin;
1280   /* do nondiagonal part */
1281   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1282   /* send it on its way */
1283   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1284   /* do local part */
1285   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1286   /* receive remote parts: note this assumes the values are not actually */
1287   /* inserted in yy until the next line, which is true for my implementation*/
1288   /* but is not perhaps always true. */
1289   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1295 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1296 {
1297   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1298   int         ierr;
1299 
1300   PetscFunctionBegin;
1301   /* do nondiagonal part */
1302   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1303   /* send it on its way */
1304   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1305   /* do local part */
1306   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1307   /* receive remote parts: note this assumes the values are not actually */
1308   /* inserted in yy until the next line, which is true for my implementation*/
1309   /* but is not perhaps always true. */
1310   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 /*
1315   This only works correctly for square matrices where the subblock A->A is the
1316    diagonal block
1317 */
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1320 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1321 {
1322   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1323   int         ierr;
1324 
1325   PetscFunctionBegin;
1326   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1327   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatScale_MPIBAIJ"
1333 int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1334 {
1335   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1336   int         ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1340   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1346 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1347 {
1348   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1349   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1350   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1351   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1352   int          *cmap,*idx_p,cstart = mat->cstart;
1353 
1354   PetscFunctionBegin;
1355   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1356   mat->getrowactive = PETSC_TRUE;
1357 
1358   if (!mat->rowvalues && (idx || v)) {
1359     /*
1360         allocate enough space to hold information from the longest row.
1361     */
1362     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1363     int     max = 1,mbs = mat->mbs,tmp;
1364     for (i=0; i<mbs; i++) {
1365       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1366       if (max < tmp) { max = tmp; }
1367     }
1368     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1369     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1370   }
1371 
1372   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1373   lrow = row - brstart;
1374 
1375   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1376   if (!v)   {pvA = 0; pvB = 0;}
1377   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1378   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1379   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1380   nztot = nzA + nzB;
1381 
1382   cmap  = mat->garray;
1383   if (v  || idx) {
1384     if (nztot) {
1385       /* Sort by increasing column numbers, assuming A and B already sorted */
1386       int imark = -1;
1387       if (v) {
1388         *v = v_p = mat->rowvalues;
1389         for (i=0; i<nzB; i++) {
1390           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1391           else break;
1392         }
1393         imark = i;
1394         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1395         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1396       }
1397       if (idx) {
1398         *idx = idx_p = mat->rowindices;
1399         if (imark > -1) {
1400           for (i=0; i<imark; i++) {
1401             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1402           }
1403         } else {
1404           for (i=0; i<nzB; i++) {
1405             if (cmap[cworkB[i]/bs] < cstart)
1406               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1407             else break;
1408           }
1409           imark = i;
1410         }
1411         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1412         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1413       }
1414     } else {
1415       if (idx) *idx = 0;
1416       if (v)   *v   = 0;
1417     }
1418   }
1419   *nz = nztot;
1420   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1421   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1427 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1428 {
1429   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1430 
1431   PetscFunctionBegin;
1432   if (baij->getrowactive == PETSC_FALSE) {
1433     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1434   }
1435   baij->getrowactive = PETSC_FALSE;
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1441 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1442 {
1443   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1444 
1445   PetscFunctionBegin;
1446   *bs = baij->bs;
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1452 int MatZeroEntries_MPIBAIJ(Mat A)
1453 {
1454   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1455   int         ierr;
1456 
1457   PetscFunctionBegin;
1458   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1459   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1465 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1466 {
1467   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1468   Mat         A = a->A,B = a->B;
1469   int         ierr;
1470   PetscReal   isend[5],irecv[5];
1471 
1472   PetscFunctionBegin;
1473   info->block_size     = (PetscReal)a->bs;
1474   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1475   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1476   isend[3] = info->memory;  isend[4] = info->mallocs;
1477   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1478   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1479   isend[3] += info->memory;  isend[4] += info->mallocs;
1480   if (flag == MAT_LOCAL) {
1481     info->nz_used      = isend[0];
1482     info->nz_allocated = isend[1];
1483     info->nz_unneeded  = isend[2];
1484     info->memory       = isend[3];
1485     info->mallocs      = isend[4];
1486   } else if (flag == MAT_GLOBAL_MAX) {
1487     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1488     info->nz_used      = irecv[0];
1489     info->nz_allocated = irecv[1];
1490     info->nz_unneeded  = irecv[2];
1491     info->memory       = irecv[3];
1492     info->mallocs      = irecv[4];
1493   } else if (flag == MAT_GLOBAL_SUM) {
1494     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1495     info->nz_used      = irecv[0];
1496     info->nz_allocated = irecv[1];
1497     info->nz_unneeded  = irecv[2];
1498     info->memory       = irecv[3];
1499     info->mallocs      = irecv[4];
1500   } else {
1501     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1502   }
1503   info->rows_global       = (PetscReal)A->M;
1504   info->columns_global    = (PetscReal)A->N;
1505   info->rows_local        = (PetscReal)A->m;
1506   info->columns_local     = (PetscReal)A->N;
1507   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1508   info->fill_ratio_needed = 0;
1509   info->factor_mallocs    = 0;
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1515 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1516 {
1517   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1518   int         ierr;
1519 
1520   PetscFunctionBegin;
1521   switch (op) {
1522   case MAT_NO_NEW_NONZERO_LOCATIONS:
1523   case MAT_YES_NEW_NONZERO_LOCATIONS:
1524   case MAT_COLUMNS_UNSORTED:
1525   case MAT_COLUMNS_SORTED:
1526   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1527   case MAT_KEEP_ZEROED_ROWS:
1528   case MAT_NEW_NONZERO_LOCATION_ERR:
1529     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1530     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1531     break;
1532   case MAT_ROW_ORIENTED:
1533     a->roworiented = PETSC_TRUE;
1534     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1535     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1536     break;
1537   case MAT_ROWS_SORTED:
1538   case MAT_ROWS_UNSORTED:
1539   case MAT_YES_NEW_DIAGONALS:
1540     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1541     break;
1542   case MAT_COLUMN_ORIENTED:
1543     a->roworiented = PETSC_FALSE;
1544     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1545     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1546     break;
1547   case MAT_IGNORE_OFF_PROC_ENTRIES:
1548     a->donotstash = PETSC_TRUE;
1549     break;
1550   case MAT_NO_NEW_DIAGONALS:
1551     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1552   case MAT_USE_HASH_TABLE:
1553     a->ht_flag = PETSC_TRUE;
1554     break;
1555   case MAT_SYMMETRIC:
1556   case MAT_STRUCTURALLY_SYMMETRIC:
1557   case MAT_NOT_SYMMETRIC:
1558   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1559   case MAT_HERMITIAN:
1560   case MAT_NOT_HERMITIAN:
1561   case MAT_SYMMETRY_ETERNAL:
1562   case MAT_NOT_SYMMETRY_ETERNAL:
1563     break;
1564   default:
1565     SETERRQ(PETSC_ERR_SUP,"unknown option");
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1572 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1573 {
1574   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1575   Mat_SeqBAIJ *Aloc;
1576   Mat         B;
1577   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1578   int         bs=baij->bs,mbs=baij->mbs;
1579   MatScalar   *a;
1580 
1581   PetscFunctionBegin;
1582   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1583   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1584 
1585   /* copy over the A part */
1586   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1587   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1588   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1589 
1590   for (i=0; i<mbs; i++) {
1591     rvals[0] = bs*(baij->rstart + i);
1592     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1593     for (j=ai[i]; j<ai[i+1]; j++) {
1594       col = (baij->cstart+aj[j])*bs;
1595       for (k=0; k<bs; k++) {
1596         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1597         col++; a += bs;
1598       }
1599     }
1600   }
1601   /* copy over the B part */
1602   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1603   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1604   for (i=0; i<mbs; i++) {
1605     rvals[0] = bs*(baij->rstart + i);
1606     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1607     for (j=ai[i]; j<ai[i+1]; j++) {
1608       col = baij->garray[aj[j]]*bs;
1609       for (k=0; k<bs; k++) {
1610         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1611         col++; a += bs;
1612       }
1613     }
1614   }
1615   ierr = PetscFree(rvals);CHKERRQ(ierr);
1616   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1617   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1618 
1619   if (matout) {
1620     *matout = B;
1621   } else {
1622     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1623   }
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 #undef __FUNCT__
1628 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1629 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1630 {
1631   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1632   Mat         a = baij->A,b = baij->B;
1633   int         ierr,s1,s2,s3;
1634 
1635   PetscFunctionBegin;
1636   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1637   if (rr) {
1638     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1639     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1640     /* Overlap communication with computation. */
1641     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1642   }
1643   if (ll) {
1644     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1645     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1646     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1647   }
1648   /* scale  the diagonal block */
1649   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1650 
1651   if (rr) {
1652     /* Do a scatter end and then right scale the off-diagonal block */
1653     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1654     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1655   }
1656 
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1662 int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1663 {
1664   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1665   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1666   int            *nprocs,j,idx,nsends,row;
1667   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1668   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1669   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1670   MPI_Comm       comm = A->comm;
1671   MPI_Request    *send_waits,*recv_waits;
1672   MPI_Status     recv_status,*send_status;
1673   IS             istmp;
1674   PetscTruth     found;
1675 
1676   PetscFunctionBegin;
1677   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1678   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1679 
1680   /*  first count number of contributors to each processor */
1681   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1682   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1683   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1684   for (i=0; i<N; i++) {
1685     idx   = rows[i];
1686     found = PETSC_FALSE;
1687     for (j=0; j<size; j++) {
1688       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1689         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1690       }
1691     }
1692     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1693   }
1694   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1695 
1696   /* inform other processors of number of messages and max length*/
1697   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1698 
1699   /* post receives:   */
1700   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1701   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1702   for (i=0; i<nrecvs; i++) {
1703     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1704   }
1705 
1706   /* do sends:
1707      1) starts[i] gives the starting index in svalues for stuff going to
1708      the ith processor
1709   */
1710   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1711   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1712   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1713   starts[0]  = 0;
1714   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1715   for (i=0; i<N; i++) {
1716     svalues[starts[owner[i]]++] = rows[i];
1717   }
1718   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1719 
1720   starts[0] = 0;
1721   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1722   count = 0;
1723   for (i=0; i<size; i++) {
1724     if (nprocs[2*i+1]) {
1725       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1726     }
1727   }
1728   ierr = PetscFree(starts);CHKERRQ(ierr);
1729 
1730   base = owners[rank]*bs;
1731 
1732   /*  wait on receives */
1733   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1734   source = lens + nrecvs;
1735   count  = nrecvs; slen = 0;
1736   while (count) {
1737     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1738     /* unpack receives into our local space */
1739     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1740     source[imdex]  = recv_status.MPI_SOURCE;
1741     lens[imdex]    = n;
1742     slen          += n;
1743     count--;
1744   }
1745   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1746 
1747   /* move the data into the send scatter */
1748   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1749   count = 0;
1750   for (i=0; i<nrecvs; i++) {
1751     values = rvalues + i*nmax;
1752     for (j=0; j<lens[i]; j++) {
1753       lrows[count++] = values[j] - base;
1754     }
1755   }
1756   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1757   ierr = PetscFree(lens);CHKERRQ(ierr);
1758   ierr = PetscFree(owner);CHKERRQ(ierr);
1759   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1760 
1761   /* actually zap the local rows */
1762   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1763   PetscLogObjectParent(A,istmp);
1764 
1765   /*
1766         Zero the required rows. If the "diagonal block" of the matrix
1767      is square and the user wishes to set the diagonal we use seperate
1768      code so that MatSetValues() is not called for each diagonal allocating
1769      new memory, thus calling lots of mallocs and slowing things down.
1770 
1771        Contributed by: Mathew Knepley
1772   */
1773   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1774   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1775   if (diag && (l->A->M == l->A->N)) {
1776     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1777   } else if (diag) {
1778     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1779     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1780       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1781 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1782     }
1783     for (i=0; i<slen; i++) {
1784       row  = lrows[i] + rstart_bs;
1785       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1786     }
1787     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1788     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1789   } else {
1790     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1791   }
1792 
1793   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1794   ierr = PetscFree(lrows);CHKERRQ(ierr);
1795 
1796   /* wait on sends */
1797   if (nsends) {
1798     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1799     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1800     ierr = PetscFree(send_status);CHKERRQ(ierr);
1801   }
1802   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1803   ierr = PetscFree(svalues);CHKERRQ(ierr);
1804 
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1810 int MatPrintHelp_MPIBAIJ(Mat A)
1811 {
1812   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1813   MPI_Comm    comm = A->comm;
1814   static int  called = 0;
1815   int         ierr;
1816 
1817   PetscFunctionBegin;
1818   if (!a->rank) {
1819     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1820   }
1821   if (called) {PetscFunctionReturn(0);} else called = 1;
1822   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1823   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1829 int MatSetUnfactored_MPIBAIJ(Mat A)
1830 {
1831   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1832   int         ierr;
1833 
1834   PetscFunctionBegin;
1835   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1840 
1841 #undef __FUNCT__
1842 #define __FUNCT__ "MatEqual_MPIBAIJ"
1843 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1844 {
1845   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1846   Mat         a,b,c,d;
1847   PetscTruth  flg;
1848   int         ierr;
1849 
1850   PetscFunctionBegin;
1851   a = matA->A; b = matA->B;
1852   c = matB->A; d = matB->B;
1853 
1854   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1855   if (flg == PETSC_TRUE) {
1856     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1857   }
1858   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1865 int MatSetUpPreallocation_MPIBAIJ(Mat A)
1866 {
1867   int        ierr;
1868 
1869   PetscFunctionBegin;
1870   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 /* -------------------------------------------------------------------*/
1875 static struct _MatOps MatOps_Values = {
1876        MatSetValues_MPIBAIJ,
1877        MatGetRow_MPIBAIJ,
1878        MatRestoreRow_MPIBAIJ,
1879        MatMult_MPIBAIJ,
1880 /* 4*/ MatMultAdd_MPIBAIJ,
1881        MatMultTranspose_MPIBAIJ,
1882        MatMultTransposeAdd_MPIBAIJ,
1883        0,
1884        0,
1885        0,
1886 /*10*/ 0,
1887        0,
1888        0,
1889        0,
1890        MatTranspose_MPIBAIJ,
1891 /*15*/ MatGetInfo_MPIBAIJ,
1892        MatEqual_MPIBAIJ,
1893        MatGetDiagonal_MPIBAIJ,
1894        MatDiagonalScale_MPIBAIJ,
1895        MatNorm_MPIBAIJ,
1896 /*20*/ MatAssemblyBegin_MPIBAIJ,
1897        MatAssemblyEnd_MPIBAIJ,
1898        0,
1899        MatSetOption_MPIBAIJ,
1900        MatZeroEntries_MPIBAIJ,
1901 /*25*/ MatZeroRows_MPIBAIJ,
1902        0,
1903        0,
1904        0,
1905        0,
1906 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1907        0,
1908        0,
1909        0,
1910        0,
1911 /*35*/ MatDuplicate_MPIBAIJ,
1912        0,
1913        0,
1914        0,
1915        0,
1916 /*40*/ 0,
1917        MatGetSubMatrices_MPIBAIJ,
1918        MatIncreaseOverlap_MPIBAIJ,
1919        MatGetValues_MPIBAIJ,
1920        0,
1921 /*45*/ MatPrintHelp_MPIBAIJ,
1922        MatScale_MPIBAIJ,
1923        0,
1924        0,
1925        0,
1926 /*50*/ MatGetBlockSize_MPIBAIJ,
1927        0,
1928        0,
1929        0,
1930        0,
1931 /*55*/ 0,
1932        0,
1933        MatSetUnfactored_MPIBAIJ,
1934        0,
1935        MatSetValuesBlocked_MPIBAIJ,
1936 /*60*/ 0,
1937        MatDestroy_MPIBAIJ,
1938        MatView_MPIBAIJ,
1939        MatGetPetscMaps_Petsc,
1940        0,
1941 /*65*/ 0,
1942        0,
1943        0,
1944        0,
1945        0,
1946 /*70*/ MatGetRowMax_MPIBAIJ,
1947        0,
1948        0,
1949        0,
1950        0,
1951 /*75*/ 0,
1952        0,
1953        0,
1954        0,
1955        0,
1956 /*80*/ 0,
1957        0,
1958        0,
1959        0,
1960 /*85*/ MatLoad_MPIBAIJ
1961 };
1962 
1963 
1964 EXTERN_C_BEGIN
1965 #undef __FUNCT__
1966 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1967 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1968 {
1969   PetscFunctionBegin;
1970   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1971   *iscopy = PETSC_FALSE;
1972   PetscFunctionReturn(0);
1973 }
1974 EXTERN_C_END
1975 
1976 EXTERN_C_BEGIN
1977 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
1978 EXTERN_C_END
1979 
1980 EXTERN_C_BEGIN
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
1983 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1984 {
1985   Mat_MPIBAIJ  *b;
1986   int          ierr,i;
1987 
1988   PetscFunctionBegin;
1989   B->preallocated = PETSC_TRUE;
1990   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1991 
1992   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1993   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1994   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1995   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1996   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1997   if (d_nnz) {
1998   for (i=0; i<B->m/bs; i++) {
1999       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2000     }
2001   }
2002   if (o_nnz) {
2003     for (i=0; i<B->m/bs; i++) {
2004       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2005     }
2006   }
2007 
2008   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2009   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2010   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2011   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2012 
2013   b = (Mat_MPIBAIJ*)B->data;
2014   b->bs  = bs;
2015   b->bs2 = bs*bs;
2016   b->mbs = B->m/bs;
2017   b->nbs = B->n/bs;
2018   b->Mbs = B->M/bs;
2019   b->Nbs = B->N/bs;
2020 
2021   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2022   b->rowners[0]    = 0;
2023   for (i=2; i<=b->size; i++) {
2024     b->rowners[i] += b->rowners[i-1];
2025   }
2026   b->rstart    = b->rowners[b->rank];
2027   b->rend      = b->rowners[b->rank+1];
2028 
2029   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2030   b->cowners[0] = 0;
2031   for (i=2; i<=b->size; i++) {
2032     b->cowners[i] += b->cowners[i-1];
2033   }
2034   b->cstart    = b->cowners[b->rank];
2035   b->cend      = b->cowners[b->rank+1];
2036 
2037   for (i=0; i<=b->size; i++) {
2038     b->rowners_bs[i] = b->rowners[i]*bs;
2039   }
2040   b->rstart_bs = b->rstart*bs;
2041   b->rend_bs   = b->rend*bs;
2042   b->cstart_bs = b->cstart*bs;
2043   b->cend_bs   = b->cend*bs;
2044 
2045   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2046   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2047   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2048   PetscLogObjectParent(B,b->A);
2049   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2050   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2051   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2052   PetscLogObjectParent(B,b->B);
2053 
2054   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2055 
2056   PetscFunctionReturn(0);
2057 }
2058 EXTERN_C_END
2059 
2060 EXTERN_C_BEGIN
2061 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2062 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2063 EXTERN_C_END
2064 
2065 /*MC
2066    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2067 
2068    Options Database Keys:
2069 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2070 
2071   Level: beginner
2072 
2073 .seealso: MatCreateMPIBAIJ
2074 M*/
2075 
2076 EXTERN_C_BEGIN
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatCreate_MPIBAIJ"
2079 int MatCreate_MPIBAIJ(Mat B)
2080 {
2081   Mat_MPIBAIJ  *b;
2082   int          ierr;
2083   PetscTruth   flg;
2084 
2085   PetscFunctionBegin;
2086   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2087   B->data = (void*)b;
2088 
2089   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2090   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2091   B->mapping    = 0;
2092   B->factor     = 0;
2093   B->assembled  = PETSC_FALSE;
2094 
2095   B->insertmode = NOT_SET_VALUES;
2096   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2097   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2098 
2099   /* build local table of row and column ownerships */
2100   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2101   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2102   b->cowners    = b->rowners + b->size + 2;
2103   b->rowners_bs = b->cowners + b->size + 2;
2104 
2105   /* build cache for off array entries formed */
2106   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2107   b->donotstash  = PETSC_FALSE;
2108   b->colmap      = PETSC_NULL;
2109   b->garray      = PETSC_NULL;
2110   b->roworiented = PETSC_TRUE;
2111 
2112 #if defined(PETSC_USE_MAT_SINGLE)
2113   /* stuff for MatSetValues_XXX in single precision */
2114   b->setvalueslen     = 0;
2115   b->setvaluescopy    = PETSC_NULL;
2116 #endif
2117 
2118   /* stuff used in block assembly */
2119   b->barray       = 0;
2120 
2121   /* stuff used for matrix vector multiply */
2122   b->lvec         = 0;
2123   b->Mvctx        = 0;
2124 
2125   /* stuff for MatGetRow() */
2126   b->rowindices   = 0;
2127   b->rowvalues    = 0;
2128   b->getrowactive = PETSC_FALSE;
2129 
2130   /* hash table stuff */
2131   b->ht           = 0;
2132   b->hd           = 0;
2133   b->ht_size      = 0;
2134   b->ht_flag      = PETSC_FALSE;
2135   b->ht_fact      = 0;
2136   b->ht_total_ct  = 0;
2137   b->ht_insert_ct = 0;
2138 
2139   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2140   if (flg) {
2141     PetscReal fact = 1.39;
2142     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2143     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2144     if (fact <= 1.0) fact = 1.39;
2145     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2146     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2147   }
2148   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2149                                      "MatStoreValues_MPIBAIJ",
2150                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2151   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2152                                      "MatRetrieveValues_MPIBAIJ",
2153                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2154   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2155                                      "MatGetDiagonalBlock_MPIBAIJ",
2156                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2157   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2158                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2159                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2160   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2161                                      "MatDiagonalScaleLocal_MPIBAIJ",
2162                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2163   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2164                                      "MatSetHashTableFactor_MPIBAIJ",
2165                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 EXTERN_C_END
2169 
2170 /*MC
2171    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2172 
2173    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2174    and MATMPIBAIJ otherwise.
2175 
2176    Options Database Keys:
2177 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2178 
2179   Level: beginner
2180 
2181 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2182 M*/
2183 
2184 EXTERN_C_BEGIN
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatCreate_BAIJ"
2187 int MatCreate_BAIJ(Mat A) {
2188   int ierr,size;
2189 
2190   PetscFunctionBegin;
2191   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2192   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2193   if (size == 1) {
2194     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2195   } else {
2196     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2197   }
2198   PetscFunctionReturn(0);
2199 }
2200 EXTERN_C_END
2201 
2202 #undef __FUNCT__
2203 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2204 /*@C
2205    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2206    (block compressed row).  For good matrix assembly performance
2207    the user should preallocate the matrix storage by setting the parameters
2208    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2209    performance can be increased by more than a factor of 50.
2210 
2211    Collective on Mat
2212 
2213    Input Parameters:
2214 +  A - the matrix
2215 .  bs   - size of blockk
2216 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2217            submatrix  (same for all local rows)
2218 .  d_nnz - array containing the number of block nonzeros in the various block rows
2219            of the in diagonal portion of the local (possibly different for each block
2220            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2221 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2222            submatrix (same for all local rows).
2223 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2224            off-diagonal portion of the local submatrix (possibly different for
2225            each block row) or PETSC_NULL.
2226 
2227    Output Parameter:
2228 
2229 
2230    Options Database Keys:
2231 .   -mat_no_unroll - uses code that does not unroll the loops in the
2232                      block calculations (much slower)
2233 .   -mat_block_size - size of the blocks to use
2234 
2235    Notes:
2236    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2237    than it must be used on all processors that share the object for that argument.
2238 
2239    Storage Information:
2240    For a square global matrix we define each processor's diagonal portion
2241    to be its local rows and the corresponding columns (a square submatrix);
2242    each processor's off-diagonal portion encompasses the remainder of the
2243    local matrix (a rectangular submatrix).
2244 
2245    The user can specify preallocated storage for the diagonal part of
2246    the local submatrix with either d_nz or d_nnz (not both).  Set
2247    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2248    memory allocation.  Likewise, specify preallocated storage for the
2249    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2250 
2251    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2252    the figure below we depict these three local rows and all columns (0-11).
2253 
2254 .vb
2255            0 1 2 3 4 5 6 7 8 9 10 11
2256           -------------------
2257    row 3  |  o o o d d d o o o o o o
2258    row 4  |  o o o d d d o o o o o o
2259    row 5  |  o o o d d d o o o o o o
2260           -------------------
2261 .ve
2262 
2263    Thus, any entries in the d locations are stored in the d (diagonal)
2264    submatrix, and any entries in the o locations are stored in the
2265    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2266    stored simply in the MATSEQBAIJ format for compressed row storage.
2267 
2268    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2269    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2270    In general, for PDE problems in which most nonzeros are near the diagonal,
2271    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2272    or you will get TERRIBLE performance; see the users' manual chapter on
2273    matrices.
2274 
2275    Level: intermediate
2276 
2277 .keywords: matrix, block, aij, compressed row, sparse, parallel
2278 
2279 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2280 @*/
2281 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2282 {
2283   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2284 
2285   PetscFunctionBegin;
2286   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2287   if (f) {
2288     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2289   }
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatCreateMPIBAIJ"
2295 /*@C
2296    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2297    (block compressed row).  For good matrix assembly performance
2298    the user should preallocate the matrix storage by setting the parameters
2299    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2300    performance can be increased by more than a factor of 50.
2301 
2302    Collective on MPI_Comm
2303 
2304    Input Parameters:
2305 +  comm - MPI communicator
2306 .  bs   - size of blockk
2307 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2308            This value should be the same as the local size used in creating the
2309            y vector for the matrix-vector product y = Ax.
2310 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2311            This value should be the same as the local size used in creating the
2312            x vector for the matrix-vector product y = Ax.
2313 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2314 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2315 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2316            submatrix  (same for all local rows)
2317 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2318            of the in diagonal portion of the local (possibly different for each block
2319            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2320 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2321            submatrix (same for all local rows).
2322 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2323            off-diagonal portion of the local submatrix (possibly different for
2324            each block row) or PETSC_NULL.
2325 
2326    Output Parameter:
2327 .  A - the matrix
2328 
2329    Options Database Keys:
2330 .   -mat_no_unroll - uses code that does not unroll the loops in the
2331                      block calculations (much slower)
2332 .   -mat_block_size - size of the blocks to use
2333 
2334    Notes:
2335    A nonzero block is any block that as 1 or more nonzeros in it
2336 
2337    The user MUST specify either the local or global matrix dimensions
2338    (possibly both).
2339 
2340    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2341    than it must be used on all processors that share the object for that argument.
2342 
2343    Storage Information:
2344    For a square global matrix we define each processor's diagonal portion
2345    to be its local rows and the corresponding columns (a square submatrix);
2346    each processor's off-diagonal portion encompasses the remainder of the
2347    local matrix (a rectangular submatrix).
2348 
2349    The user can specify preallocated storage for the diagonal part of
2350    the local submatrix with either d_nz or d_nnz (not both).  Set
2351    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2352    memory allocation.  Likewise, specify preallocated storage for the
2353    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2354 
2355    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2356    the figure below we depict these three local rows and all columns (0-11).
2357 
2358 .vb
2359            0 1 2 3 4 5 6 7 8 9 10 11
2360           -------------------
2361    row 3  |  o o o d d d o o o o o o
2362    row 4  |  o o o d d d o o o o o o
2363    row 5  |  o o o d d d o o o o o o
2364           -------------------
2365 .ve
2366 
2367    Thus, any entries in the d locations are stored in the d (diagonal)
2368    submatrix, and any entries in the o locations are stored in the
2369    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2370    stored simply in the MATSEQBAIJ format for compressed row storage.
2371 
2372    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2373    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2374    In general, for PDE problems in which most nonzeros are near the diagonal,
2375    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2376    or you will get TERRIBLE performance; see the users' manual chapter on
2377    matrices.
2378 
2379    Level: intermediate
2380 
2381 .keywords: matrix, block, aij, compressed row, sparse, parallel
2382 
2383 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2384 @*/
2385 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2386 {
2387   int ierr,size;
2388 
2389   PetscFunctionBegin;
2390   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2391   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2392   if (size > 1) {
2393     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2394     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2395   } else {
2396     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2397     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2398   }
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2404 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2405 {
2406   Mat         mat;
2407   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2408   int         ierr,len=0;
2409 
2410   PetscFunctionBegin;
2411   *newmat       = 0;
2412   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2413   ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr);
2414 
2415   ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2416   mat->factor       = matin->factor;
2417   mat->preallocated = PETSC_TRUE;
2418   mat->assembled    = PETSC_TRUE;
2419   mat->insertmode   = NOT_SET_VALUES;
2420 
2421   a      = (Mat_MPIBAIJ*)mat->data;
2422   a->bs  = oldmat->bs;
2423   a->bs2 = oldmat->bs2;
2424   a->mbs = oldmat->mbs;
2425   a->nbs = oldmat->nbs;
2426   a->Mbs = oldmat->Mbs;
2427   a->Nbs = oldmat->Nbs;
2428 
2429   a->rstart       = oldmat->rstart;
2430   a->rend         = oldmat->rend;
2431   a->cstart       = oldmat->cstart;
2432   a->cend         = oldmat->cend;
2433   a->size         = oldmat->size;
2434   a->rank         = oldmat->rank;
2435   a->donotstash   = oldmat->donotstash;
2436   a->roworiented  = oldmat->roworiented;
2437   a->rowindices   = 0;
2438   a->rowvalues    = 0;
2439   a->getrowactive = PETSC_FALSE;
2440   a->barray       = 0;
2441   a->rstart_bs    = oldmat->rstart_bs;
2442   a->rend_bs      = oldmat->rend_bs;
2443   a->cstart_bs    = oldmat->cstart_bs;
2444   a->cend_bs      = oldmat->cend_bs;
2445 
2446   /* hash table stuff */
2447   a->ht           = 0;
2448   a->hd           = 0;
2449   a->ht_size      = 0;
2450   a->ht_flag      = oldmat->ht_flag;
2451   a->ht_fact      = oldmat->ht_fact;
2452   a->ht_total_ct  = 0;
2453   a->ht_insert_ct = 0;
2454 
2455   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2456   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2457   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2458   if (oldmat->colmap) {
2459 #if defined (PETSC_USE_CTABLE)
2460   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2461 #else
2462   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2463   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2464   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2465 #endif
2466   } else a->colmap = 0;
2467 
2468   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2469     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2470     PetscLogObjectMemory(mat,len*sizeof(int));
2471     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2472   } else a->garray = 0;
2473 
2474   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2475   PetscLogObjectParent(mat,a->lvec);
2476   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2477   PetscLogObjectParent(mat,a->Mvctx);
2478 
2479   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2480   PetscLogObjectParent(mat,a->A);
2481   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2482   PetscLogObjectParent(mat,a->B);
2483   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2484   *newmat = mat;
2485 
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 #include "petscsys.h"
2490 
2491 #undef __FUNCT__
2492 #define __FUNCT__ "MatLoad_MPIBAIJ"
2493 int MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2494 {
2495   Mat          A;
2496   int          i,nz,ierr,j,rstart,rend,fd;
2497   PetscScalar  *vals,*buf;
2498   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2499   MPI_Status   status;
2500   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2501   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2502   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2503   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2504   int          dcount,kmax,k,nzcount,tmp;
2505 
2506   PetscFunctionBegin;
2507   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2508 
2509   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2510   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2511   if (!rank) {
2512     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2513     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2514     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2515     if (header[3] < 0) {
2516       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2517     }
2518   }
2519 
2520   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2521   M = header[1]; N = header[2];
2522 
2523   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2524 
2525   /*
2526      This code adds extra rows to make sure the number of rows is
2527      divisible by the blocksize
2528   */
2529   Mbs        = M/bs;
2530   extra_rows = bs - M + bs*(Mbs);
2531   if (extra_rows == bs) extra_rows = 0;
2532   else                  Mbs++;
2533   if (extra_rows &&!rank) {
2534     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2535   }
2536 
2537   /* determine ownership of all rows */
2538   mbs        = Mbs/size + ((Mbs % size) > rank);
2539   m          = mbs*bs;
2540   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2541   browners   = rowners + size + 1;
2542   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2543   rowners[0] = 0;
2544   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2545   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2546   rstart = rowners[rank];
2547   rend   = rowners[rank+1];
2548 
2549   /* distribute row lengths to all processors */
2550   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2551   if (!rank) {
2552     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2553     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2554     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2555     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2556     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2557     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2558     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2559   } else {
2560     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2561   }
2562 
2563   if (!rank) {
2564     /* calculate the number of nonzeros on each processor */
2565     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2566     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2567     for (i=0; i<size; i++) {
2568       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2569         procsnz[i] += rowlengths[j];
2570       }
2571     }
2572     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2573 
2574     /* determine max buffer needed and allocate it */
2575     maxnz = 0;
2576     for (i=0; i<size; i++) {
2577       maxnz = PetscMax(maxnz,procsnz[i]);
2578     }
2579     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2580 
2581     /* read in my part of the matrix column indices  */
2582     nz     = procsnz[0];
2583     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2584     mycols = ibuf;
2585     if (size == 1)  nz -= extra_rows;
2586     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2587     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2588 
2589     /* read in every ones (except the last) and ship off */
2590     for (i=1; i<size-1; i++) {
2591       nz   = procsnz[i];
2592       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2593       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2594     }
2595     /* read in the stuff for the last proc */
2596     if (size != 1) {
2597       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2598       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2599       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2600       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2601     }
2602     ierr = PetscFree(cols);CHKERRQ(ierr);
2603   } else {
2604     /* determine buffer space needed for message */
2605     nz = 0;
2606     for (i=0; i<m; i++) {
2607       nz += locrowlens[i];
2608     }
2609     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2610     mycols = ibuf;
2611     /* receive message of column indices*/
2612     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2613     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2614     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2615   }
2616 
2617   /* loop over local rows, determining number of off diagonal entries */
2618   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2619   odlens   = dlens + (rend-rstart);
2620   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2621   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2622   masked1  = mask    + Mbs;
2623   masked2  = masked1 + Mbs;
2624   rowcount = 0; nzcount = 0;
2625   for (i=0; i<mbs; i++) {
2626     dcount  = 0;
2627     odcount = 0;
2628     for (j=0; j<bs; j++) {
2629       kmax = locrowlens[rowcount];
2630       for (k=0; k<kmax; k++) {
2631         tmp = mycols[nzcount++]/bs;
2632         if (!mask[tmp]) {
2633           mask[tmp] = 1;
2634           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2635           else masked1[dcount++] = tmp;
2636         }
2637       }
2638       rowcount++;
2639     }
2640 
2641     dlens[i]  = dcount;
2642     odlens[i] = odcount;
2643 
2644     /* zero out the mask elements we set */
2645     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2646     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2647   }
2648 
2649   /* create our matrix */
2650   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2651   ierr = MatSetType(A,type);CHKERRQ(ierr)
2652   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2653 
2654   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2655   MatSetOption(A,MAT_COLUMNS_SORTED);
2656 
2657   if (!rank) {
2658     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2659     /* read in my part of the matrix numerical values  */
2660     nz = procsnz[0];
2661     vals = buf;
2662     mycols = ibuf;
2663     if (size == 1)  nz -= extra_rows;
2664     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2665     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2666 
2667     /* insert into matrix */
2668     jj      = rstart*bs;
2669     for (i=0; i<m; i++) {
2670       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2671       mycols += locrowlens[i];
2672       vals   += locrowlens[i];
2673       jj++;
2674     }
2675     /* read in other processors (except the last one) and ship out */
2676     for (i=1; i<size-1; i++) {
2677       nz   = procsnz[i];
2678       vals = buf;
2679       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2680       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2681     }
2682     /* the last proc */
2683     if (size != 1){
2684       nz   = procsnz[i] - extra_rows;
2685       vals = buf;
2686       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2687       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2688       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2689     }
2690     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2691   } else {
2692     /* receive numeric values */
2693     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2694 
2695     /* receive message of values*/
2696     vals   = buf;
2697     mycols = ibuf;
2698     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2699     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2700     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2701 
2702     /* insert into matrix */
2703     jj      = rstart*bs;
2704     for (i=0; i<m; i++) {
2705       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2706       mycols += locrowlens[i];
2707       vals   += locrowlens[i];
2708       jj++;
2709     }
2710   }
2711   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2712   ierr = PetscFree(buf);CHKERRQ(ierr);
2713   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2714   ierr = PetscFree(rowners);CHKERRQ(ierr);
2715   ierr = PetscFree(dlens);CHKERRQ(ierr);
2716   ierr = PetscFree(mask);CHKERRQ(ierr);
2717   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2718   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2719 
2720   *newmat = A;
2721   PetscFunctionReturn(0);
2722 }
2723 
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2726 /*@
2727    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2728 
2729    Input Parameters:
2730 .  mat  - the matrix
2731 .  fact - factor
2732 
2733    Collective on Mat
2734 
2735    Level: advanced
2736 
2737   Notes:
2738    This can also be set by the command line option: -mat_use_hash_table fact
2739 
2740 .keywords: matrix, hashtable, factor, HT
2741 
2742 .seealso: MatSetOption()
2743 @*/
2744 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2745 {
2746   int ierr,(*f)(Mat,PetscReal);
2747 
2748   PetscFunctionBegin;
2749   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2750   if (f) {
2751     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2752   }
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 #undef __FUNCT__
2757 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2758 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2759 {
2760   Mat_MPIBAIJ *baij;
2761 
2762   PetscFunctionBegin;
2763   baij = (Mat_MPIBAIJ*)mat->data;
2764   baij->ht_fact = fact;
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 #undef __FUNCT__
2769 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2770 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2771 {
2772   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2773   PetscFunctionBegin;
2774   *Ad     = a->A;
2775   *Ao     = a->B;
2776   *colmap = a->garray;
2777   PetscFunctionReturn(0);
2778 }
2779