xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 1d73ed98e2847f7abb40ffed08f57c10c487fa4f)
1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN int DisAssemble_MPIBAIJ(Mat);
7 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
8 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
10 EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
11 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
12 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
13 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
14 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
15 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
16 
17 /*  UGLY, ugly, ugly
18    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
19    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
20    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
21    converts the entries into single precision and then calls ..._MatScalar() to put them
22    into the single precision data structures.
23 */
24 #if defined(PETSC_USE_MAT_SINGLE)
25 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
26 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
27 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
28 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
29 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
30 #else
31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
32 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
34 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
36 #endif
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
41 {
42   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
43   int          ierr,i;
44   PetscScalar  *va,*vb;
45   Vec          vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 int MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
73   int         ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 int MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
88   int         ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 int CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
109   int         nbs = B->nbs,i,bs=B->bs,ierr;
110 
111   PetscFunctionBegin;
112 #if defined (PETSC_USE_CTABLE)
113   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
114   for (i=0; i<nbs; i++){
115     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
116   }
117 #else
118   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
119   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
120   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
121   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
122 #endif
123   PetscFunctionReturn(0);
124 }
125 
126 #define CHUNKSIZE  10
127 
128 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
129 { \
130  \
131     brow = row/bs;  \
132     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
133     rmax = aimax[brow]; nrow = ailen[brow]; \
134       bcol = col/bs; \
135       ridx = row % bs; cidx = col % bs; \
136       low = 0; high = nrow; \
137       while (high-low > 3) { \
138         t = (low+high)/2; \
139         if (rp[t] > bcol) high = t; \
140         else              low  = t; \
141       } \
142       for (_i=low; _i<high; _i++) { \
143         if (rp[_i] > bcol) break; \
144         if (rp[_i] == bcol) { \
145           bap  = ap +  bs2*_i + bs*cidx + ridx; \
146           if (addv == ADD_VALUES) *bap += value;  \
147           else                    *bap  = value;  \
148           goto a_noinsert; \
149         } \
150       } \
151       if (a->nonew == 1) goto a_noinsert; \
152       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
153       if (nrow >= rmax) { \
154         /* there is no extra room in row, therefore enlarge */ \
155         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
156         MatScalar *new_a; \
157  \
158         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
159  \
160         /* malloc new storage space */ \
161         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
162         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
163         new_j   = (int*)(new_a + bs2*new_nz); \
164         new_i   = new_j + new_nz; \
165  \
166         /* copy over old data into new slots */ \
167         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
168         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
169         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
170         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
171         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
172         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
173         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
174         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
175                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
176         /* free up old matrix storage */ \
177         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
178         if (!a->singlemalloc) { \
179           ierr = PetscFree(a->i);CHKERRQ(ierr); \
180           ierr = PetscFree(a->j);CHKERRQ(ierr);\
181         } \
182         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
183         a->singlemalloc = PETSC_TRUE; \
184  \
185         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
186         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
187         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
188         a->maxnz += bs2*CHUNKSIZE; \
189         a->reallocs++; \
190         a->nz++; \
191       } \
192       N = nrow++ - 1;  \
193       /* shift up all the later entries in this row */ \
194       for (ii=N; ii>=_i; ii--) { \
195         rp[ii+1] = rp[ii]; \
196         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
197       } \
198       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
199       rp[_i]                      = bcol;  \
200       ap[bs2*_i + bs*cidx + ridx] = value;  \
201       a_noinsert:; \
202     ailen[brow] = nrow; \
203 }
204 
205 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
206 { \
207     brow = row/bs;  \
208     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
209     rmax = bimax[brow]; nrow = bilen[brow]; \
210       bcol = col/bs; \
211       ridx = row % bs; cidx = col % bs; \
212       low = 0; high = nrow; \
213       while (high-low > 3) { \
214         t = (low+high)/2; \
215         if (rp[t] > bcol) high = t; \
216         else              low  = t; \
217       } \
218       for (_i=low; _i<high; _i++) { \
219         if (rp[_i] > bcol) break; \
220         if (rp[_i] == bcol) { \
221           bap  = ap +  bs2*_i + bs*cidx + ridx; \
222           if (addv == ADD_VALUES) *bap += value;  \
223           else                    *bap  = value;  \
224           goto b_noinsert; \
225         } \
226       } \
227       if (b->nonew == 1) goto b_noinsert; \
228       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
229       if (nrow >= rmax) { \
230         /* there is no extra room in row, therefore enlarge */ \
231         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
232         MatScalar *new_a; \
233  \
234         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
235  \
236         /* malloc new storage space */ \
237         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
238         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
239         new_j   = (int*)(new_a + bs2*new_nz); \
240         new_i   = new_j + new_nz; \
241  \
242         /* copy over old data into new slots */ \
243         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
244         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
245         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
246         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
247         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
248         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
249         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
251                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
252         /* free up old matrix storage */ \
253         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
254         if (!b->singlemalloc) { \
255           ierr = PetscFree(b->i);CHKERRQ(ierr); \
256           ierr = PetscFree(b->j);CHKERRQ(ierr); \
257         } \
258         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
259         b->singlemalloc = PETSC_TRUE; \
260  \
261         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
262         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
263         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
264         b->maxnz += bs2*CHUNKSIZE; \
265         b->reallocs++; \
266         b->nz++; \
267       } \
268       N = nrow++ - 1;  \
269       /* shift up all the later entries in this row */ \
270       for (ii=N; ii>=_i; ii--) { \
271         rp[ii+1] = rp[ii]; \
272         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
273       } \
274       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
275       rp[_i]                      = bcol;  \
276       ap[bs2*_i + bs*cidx + ridx] = value;  \
277       b_noinsert:; \
278     bilen[brow] = nrow; \
279 }
280 
281 #if defined(PETSC_USE_MAT_SINGLE)
282 #undef __FUNCT__
283 #define __FUNCT__ "MatSetValues_MPIBAIJ"
284 int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
285 {
286   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
287   int         ierr,i,N = m*n;
288   MatScalar   *vsingle;
289 
290   PetscFunctionBegin;
291   if (N > b->setvalueslen) {
292     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
293     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
294     b->setvalueslen  = N;
295   }
296   vsingle = b->setvaluescopy;
297 
298   for (i=0; i<N; i++) {
299     vsingle[i] = v[i];
300   }
301   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
307 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
308 {
309   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
310   int         ierr,i,N = m*n*b->bs2;
311   MatScalar   *vsingle;
312 
313   PetscFunctionBegin;
314   if (N > b->setvalueslen) {
315     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
316     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
317     b->setvalueslen  = N;
318   }
319   vsingle = b->setvaluescopy;
320   for (i=0; i<N; i++) {
321     vsingle[i] = v[i];
322   }
323   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
329 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
330 {
331   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
332   int         ierr,i,N = m*n;
333   MatScalar   *vsingle;
334 
335   PetscFunctionBegin;
336   if (N > b->setvalueslen) {
337     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
338     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
339     b->setvalueslen  = N;
340   }
341   vsingle = b->setvaluescopy;
342   for (i=0; i<N; i++) {
343     vsingle[i] = v[i];
344   }
345   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
346   PetscFunctionReturn(0);
347 }
348 
349 #undef __FUNCT__
350 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
351 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
352 {
353   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
354   int         ierr,i,N = m*n*b->bs2;
355   MatScalar   *vsingle;
356 
357   PetscFunctionBegin;
358   if (N > b->setvalueslen) {
359     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
360     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
361     b->setvalueslen  = N;
362   }
363   vsingle = b->setvaluescopy;
364   for (i=0; i<N; i++) {
365     vsingle[i] = v[i];
366   }
367   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
368   PetscFunctionReturn(0);
369 }
370 #endif
371 
372 #undef __FUNCT__
373 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
374 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
375 {
376   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
377   MatScalar   value;
378   PetscTruth  roworiented = baij->roworiented;
379   int         ierr,i,j,row,col;
380   int         rstart_orig=baij->rstart_bs;
381   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
382   int         cend_orig=baij->cend_bs,bs=baij->bs;
383 
384   /* Some Variables required in the macro */
385   Mat         A = baij->A;
386   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
387   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
388   MatScalar   *aa=a->a;
389 
390   Mat         B = baij->B;
391   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
392   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
393   MatScalar   *ba=b->a;
394 
395   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
396   int         low,high,t,ridx,cidx,bs2=a->bs2;
397   MatScalar   *ap,*bap;
398 
399   PetscFunctionBegin;
400   for (i=0; i<m; i++) {
401     if (im[i] < 0) continue;
402 #if defined(PETSC_USE_BOPT_g)
403     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
404 #endif
405     if (im[i] >= rstart_orig && im[i] < rend_orig) {
406       row = im[i] - rstart_orig;
407       for (j=0; j<n; j++) {
408         if (in[j] >= cstart_orig && in[j] < cend_orig){
409           col = in[j] - cstart_orig;
410           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
412           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
413         } else if (in[j] < 0) continue;
414 #if defined(PETSC_USE_BOPT_g)
415         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
416 #endif
417         else {
418           if (mat->was_assembled) {
419             if (!baij->colmap) {
420               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
421             }
422 #if defined (PETSC_USE_CTABLE)
423             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
424             col  = col - 1;
425 #else
426             col = baij->colmap[in[j]/bs] - 1;
427 #endif
428             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
429               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
430               col =  in[j];
431               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
432               B = baij->B;
433               b = (Mat_SeqBAIJ*)(B)->data;
434               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
435               ba=b->a;
436             } else col += in[j]%bs;
437           } else col = in[j];
438           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
439           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
440           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
441         }
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNCT__
457 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
458 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
459 {
460   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
461   const MatScalar *value;
462   MatScalar       *barray=baij->barray;
463   PetscTruth      roworiented = baij->roworiented;
464   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
465   int             rend=baij->rend,cstart=baij->cstart,stepval;
466   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
467 
468   PetscFunctionBegin;
469   if(!barray) {
470     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
471     baij->barray = barray;
472   }
473 
474   if (roworiented) {
475     stepval = (n-1)*bs;
476   } else {
477     stepval = (m-1)*bs;
478   }
479   for (i=0; i<m; i++) {
480     if (im[i] < 0) continue;
481 #if defined(PETSC_USE_BOPT_g)
482     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
483 #endif
484     if (im[i] >= rstart && im[i] < rend) {
485       row = im[i] - rstart;
486       for (j=0; j<n; j++) {
487         /* If NumCol = 1 then a copy is not required */
488         if ((roworiented) && (n == 1)) {
489           barray = (MatScalar*)v + i*bs2;
490         } else if((!roworiented) && (m == 1)) {
491           barray = (MatScalar*)v + j*bs2;
492         } else { /* Here a copy is required */
493           if (roworiented) {
494             value = v + i*(stepval+bs)*bs + j*bs;
495           } else {
496             value = v + j*(stepval+bs)*bs + i*bs;
497           }
498           for (ii=0; ii<bs; ii++,value+=stepval) {
499             for (jj=0; jj<bs; jj++) {
500               *barray++  = *value++;
501             }
502           }
503           barray -=bs2;
504         }
505 
506         if (in[j] >= cstart && in[j] < cend){
507           col  = in[j] - cstart;
508           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510         else if (in[j] < 0) continue;
511 #if defined(PETSC_USE_BOPT_g)
512         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
513 #endif
514         else {
515           if (mat->was_assembled) {
516             if (!baij->colmap) {
517               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
518             }
519 
520 #if defined(PETSC_USE_BOPT_g)
521 #if defined (PETSC_USE_CTABLE)
522             { int data;
523               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
525             }
526 #else
527             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
528 #endif
529 #endif
530 #if defined (PETSC_USE_CTABLE)
531 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532             col  = (col - 1)/bs;
533 #else
534             col = (baij->colmap[in[j]] - 1)/bs;
535 #endif
536             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
538               col =  in[j];
539             }
540           }
541           else col = in[j];
542           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555   PetscFunctionReturn(0);
556 }
557 
558 #define HASH_KEY 0.6180339887
559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562 #undef __FUNCT__
563 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
565 {
566   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscTruth  roworiented = baij->roworiented;
568   int         ierr,i,j,row,col;
569   int         rstart_orig=baij->rstart_bs;
570   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
571   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
572   PetscReal   tmp;
573   MatScalar   **HD = baij->hd,value;
574 #if defined(PETSC_USE_BOPT_g)
575   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
576 #endif
577 
578   PetscFunctionBegin;
579 
580   for (i=0; i<m; i++) {
581 #if defined(PETSC_USE_BOPT_g)
582     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
583     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
584 #endif
585       row = im[i];
586     if (row >= rstart_orig && row < rend_orig) {
587       for (j=0; j<n; j++) {
588         col = in[j];
589         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
590         /* Look up into the Hash Table */
591         key = (row/bs)*Nbs+(col/bs)+1;
592         h1  = HASH(size,key,tmp);
593 
594 
595         idx = h1;
596 #if defined(PETSC_USE_BOPT_g)
597         insert_ct++;
598         total_ct++;
599         if (HT[idx] != key) {
600           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
601           if (idx == size) {
602             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
603             if (idx == h1) {
604               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
605             }
606           }
607         }
608 #else
609         if (HT[idx] != key) {
610           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
611           if (idx == size) {
612             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
613             if (idx == h1) {
614               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
615             }
616           }
617         }
618 #endif
619         /* A HASH table entry is found, so insert the values at the correct address */
620         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
621         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_BOPT_g)
634   baij->ht_total_ct = total_ct;
635   baij->ht_insert_ct = insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
643 {
644   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
645   PetscTruth      roworiented = baij->roworiented;
646   int             ierr,i,j,ii,jj,row,col;
647   int             rstart=baij->rstart ;
648   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
649   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
650   PetscReal       tmp;
651   MatScalar       **HD = baij->hd,*baij_a;
652   const MatScalar *v_t,*value;
653 #if defined(PETSC_USE_BOPT_g)
654   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
655 #endif
656 
657   PetscFunctionBegin;
658 
659   if (roworiented) {
660     stepval = (n-1)*bs;
661   } else {
662     stepval = (m-1)*bs;
663   }
664   for (i=0; i<m; i++) {
665 #if defined(PETSC_USE_BOPT_g)
666     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
667     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
668 #endif
669     row   = im[i];
670     v_t   = v + i*bs2;
671     if (row >= rstart && row < rend) {
672       for (j=0; j<n; j++) {
673         col = in[j];
674 
675         /* Look up into the Hash Table */
676         key = row*Nbs+col+1;
677         h1  = HASH(size,key,tmp);
678 
679         idx = h1;
680 #if defined(PETSC_USE_BOPT_g)
681         total_ct++;
682         insert_ct++;
683        if (HT[idx] != key) {
684           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
685           if (idx == size) {
686             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
687             if (idx == h1) {
688               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
689             }
690           }
691         }
692 #else
693         if (HT[idx] != key) {
694           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
695           if (idx == size) {
696             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
697             if (idx == h1) {
698               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
699             }
700           }
701         }
702 #endif
703         baij_a = HD[idx];
704         if (roworiented) {
705           /*value = v + i*(stepval+bs)*bs + j*bs;*/
706           /* value = v + (i*(stepval+bs)+j)*bs; */
707           value = v_t;
708           v_t  += bs;
709           if (addv == ADD_VALUES) {
710             for (ii=0; ii<bs; ii++,value+=stepval) {
711               for (jj=ii; jj<bs2; jj+=bs) {
712                 baij_a[jj]  += *value++;
713               }
714             }
715           } else {
716             for (ii=0; ii<bs; ii++,value+=stepval) {
717               for (jj=ii; jj<bs2; jj+=bs) {
718                 baij_a[jj]  = *value++;
719               }
720             }
721           }
722         } else {
723           value = v + j*(stepval+bs)*bs + i*bs;
724           if (addv == ADD_VALUES) {
725             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
726               for (jj=0; jj<bs; jj++) {
727                 baij_a[jj]  += *value++;
728               }
729             }
730           } else {
731             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
732               for (jj=0; jj<bs; jj++) {
733                 baij_a[jj]  = *value++;
734               }
735             }
736           }
737         }
738       }
739     } else {
740       if (!baij->donotstash) {
741         if (roworiented) {
742           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743         } else {
744           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
745         }
746       }
747     }
748   }
749 #if defined(PETSC_USE_BOPT_g)
750   baij->ht_total_ct = total_ct;
751   baij->ht_insert_ct = insert_ct;
752 #endif
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatGetValues_MPIBAIJ"
758 int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
759 {
760   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
761   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
762   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
763 
764   PetscFunctionBegin;
765   for (i=0; i<m; i++) {
766     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
767     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
768     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
769       row = idxm[i] - bsrstart;
770       for (j=0; j<n; j++) {
771         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
772         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
773         if (idxn[j] >= bscstart && idxn[j] < bscend){
774           col = idxn[j] - bscstart;
775           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776         } else {
777           if (!baij->colmap) {
778             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
779           }
780 #if defined (PETSC_USE_CTABLE)
781           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
782           data --;
783 #else
784           data = baij->colmap[idxn[j]/bs]-1;
785 #endif
786           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
787           else {
788             col  = data + idxn[j]%bs;
789             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
790           }
791         }
792       }
793     } else {
794       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
795     }
796   }
797  PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatNorm_MPIBAIJ"
802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
803 {
804   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
805   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
806   int        ierr,i,bs2=baij->bs2;
807   PetscReal  sum = 0.0;
808   MatScalar  *v;
809 
810   PetscFunctionBegin;
811   if (baij->size == 1) {
812     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
813   } else {
814     if (type == NORM_FROBENIUS) {
815       v = amat->a;
816       for (i=0; i<amat->nz*bs2; i++) {
817 #if defined(PETSC_USE_COMPLEX)
818         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
819 #else
820         sum += (*v)*(*v); v++;
821 #endif
822       }
823       v = bmat->a;
824       for (i=0; i<bmat->nz*bs2; i++) {
825 #if defined(PETSC_USE_COMPLEX)
826         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
827 #else
828         sum += (*v)*(*v); v++;
829 #endif
830       }
831       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
832       *nrm = sqrt(*nrm);
833     } else {
834       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835     }
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat         A = baij->A,B=baij->B;
854   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
857   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858   int         *HT,key;
859   MatScalar   **HD;
860   PetscReal   tmp;
861 #if defined(PETSC_USE_BOPT_g)
862   int         ct=0,max=0;
863 #endif
864 
865   PetscFunctionBegin;
866   baij->ht_size=(int)(factor*nz);
867   size = baij->ht_size;
868 
869   if (baij->ht) {
870     PetscFunctionReturn(0);
871   }
872 
873   /* Allocate Memory for Hash Table */
874   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
875   baij->ht = (int*)(baij->hd + size);
876   HD       = baij->hd;
877   HT       = baij->ht;
878 
879 
880   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
881 
882 
883   /* Loop Over A */
884   for (i=0; i<a->mbs; i++) {
885     for (j=ai[i]; j<ai[i+1]; j++) {
886       row = i+rstart;
887       col = aj[j]+cstart;
888 
889       key = row*Nbs + col + 1;
890       h1  = HASH(size,key,tmp);
891       for (k=0; k<size; k++){
892         if (HT[(h1+k)%size] == 0.0) {
893           HT[(h1+k)%size] = key;
894           HD[(h1+k)%size] = a->a + j*bs2;
895           break;
896 #if defined(PETSC_USE_BOPT_g)
897         } else {
898           ct++;
899 #endif
900         }
901       }
902 #if defined(PETSC_USE_BOPT_g)
903       if (k> max) max = k;
904 #endif
905     }
906   }
907   /* Loop Over B */
908   for (i=0; i<b->mbs; i++) {
909     for (j=bi[i]; j<bi[i+1]; j++) {
910       row = i+rstart;
911       col = garray[bj[j]];
912       key = row*Nbs + col + 1;
913       h1  = HASH(size,key,tmp);
914       for (k=0; k<size; k++){
915         if (HT[(h1+k)%size] == 0.0) {
916           HT[(h1+k)%size] = key;
917           HD[(h1+k)%size] = b->a + j*bs2;
918           break;
919 #if defined(PETSC_USE_BOPT_g)
920         } else {
921           ct++;
922 #endif
923         }
924       }
925 #if defined(PETSC_USE_BOPT_g)
926       if (k> max) max = k;
927 #endif
928     }
929   }
930 
931   /* Print Summary */
932 #if defined(PETSC_USE_BOPT_g)
933   for (i=0,j=0; i<size; i++) {
934     if (HT[i]) {j++;}
935   }
936   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
937 #endif
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944 {
945   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
946   int         ierr,nstash,reallocs;
947   InsertMode  addv;
948 
949   PetscFunctionBegin;
950   if (baij->donotstash) {
951     PetscFunctionReturn(0);
952   }
953 
954   /* make sure all processors are either in INSERTMODE or ADDMODE */
955   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
956   if (addv == (ADD_VALUES|INSERT_VALUES)) {
957     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
958   }
959   mat->insertmode = addv; /* in case this processor had no cache */
960 
961   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
962   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
963   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
964   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
965   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
966   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
967   PetscFunctionReturn(0);
968 }
969 
970 #undef __FUNCT__
971 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
972 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
973 {
974   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
975   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
976   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
977   int         *row,*col,other_disassembled;
978   PetscTruth  r1,r2,r3;
979   MatScalar   *val;
980   InsertMode  addv = mat->insertmode;
981 
982   PetscFunctionBegin;
983   if (!baij->donotstash) {
984     while (1) {
985       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
986       if (!flg) break;
987 
988       for (i=0; i<n;) {
989         /* Now identify the consecutive vals belonging to the same row */
990         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
991         if (j < n) ncols = j-i;
992         else       ncols = n-i;
993         /* Now assemble all these values with a single function call */
994         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
995         i = j;
996       }
997     }
998     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
999     /* Now process the block-stash. Since the values are stashed column-oriented,
1000        set the roworiented flag to column oriented, and after MatSetValues()
1001        restore the original flags */
1002     r1 = baij->roworiented;
1003     r2 = a->roworiented;
1004     r3 = b->roworiented;
1005     baij->roworiented = PETSC_FALSE;
1006     a->roworiented    = PETSC_FALSE;
1007     b->roworiented    = PETSC_FALSE;
1008     while (1) {
1009       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1010       if (!flg) break;
1011 
1012       for (i=0; i<n;) {
1013         /* Now identify the consecutive vals belonging to the same row */
1014         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1015         if (j < n) ncols = j-i;
1016         else       ncols = n-i;
1017         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1018         i = j;
1019       }
1020     }
1021     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1022     baij->roworiented = r1;
1023     a->roworiented    = r2;
1024     b->roworiented    = r3;
1025   }
1026 
1027   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1028   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1029 
1030   /* determine if any processor has disassembled, if so we must
1031      also disassemble ourselfs, in order that we may reassemble. */
1032   /*
1033      if nonzero structure of submatrix B cannot change then we know that
1034      no processor disassembled thus we can skip this stuff
1035   */
1036   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1037     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1038     if (mat->was_assembled && !other_disassembled) {
1039       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1040     }
1041   }
1042 
1043   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1044     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1045   }
1046   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1047   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1048 
1049 #if defined(PETSC_USE_BOPT_g)
1050   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1051     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1052     baij->ht_total_ct  = 0;
1053     baij->ht_insert_ct = 0;
1054   }
1055 #endif
1056   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1057     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1058     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1059     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1060   }
1061 
1062   if (baij->rowvalues) {
1063     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1064     baij->rowvalues = 0;
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1071 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1072 {
1073   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1074   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1075   PetscTruth        isascii,isdraw;
1076   PetscViewer       sviewer;
1077   PetscViewerFormat format;
1078 
1079   PetscFunctionBegin;
1080   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1081   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1082   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1083   if (isascii) {
1084     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1085     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1086       MatInfo info;
1087       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1088       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1089       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1090               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1091               baij->bs,(int)info.memory);CHKERRQ(ierr);
1092       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1093       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1094       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1095       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1096       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1097       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1098       PetscFunctionReturn(0);
1099     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1100       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1101       PetscFunctionReturn(0);
1102     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1103       PetscFunctionReturn(0);
1104     }
1105   }
1106 
1107   if (isdraw) {
1108     PetscDraw       draw;
1109     PetscTruth isnull;
1110     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1111     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1112   }
1113 
1114   if (size == 1) {
1115     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1116     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1117   } else {
1118     /* assemble the entire matrix onto first processor. */
1119     Mat         A;
1120     Mat_SeqBAIJ *Aloc;
1121     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1122     MatScalar   *a;
1123 
1124     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1125     /* Perhaps this should be the type of mat? */
1126     if (!rank) {
1127       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1128     } else {
1129       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1130     }
1131     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1132     ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1133     PetscLogObjectParent(mat,A);
1134 
1135     /* copy over the A part */
1136     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1137     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1138     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1139 
1140     for (i=0; i<mbs; i++) {
1141       rvals[0] = bs*(baij->rstart + i);
1142       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1143       for (j=ai[i]; j<ai[i+1]; j++) {
1144         col = (baij->cstart+aj[j])*bs;
1145         for (k=0; k<bs; k++) {
1146           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1147           col++; a += bs;
1148         }
1149       }
1150     }
1151     /* copy over the B part */
1152     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1153     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1154     for (i=0; i<mbs; i++) {
1155       rvals[0] = bs*(baij->rstart + i);
1156       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1157       for (j=ai[i]; j<ai[i+1]; j++) {
1158         col = baij->garray[aj[j]]*bs;
1159         for (k=0; k<bs; k++) {
1160           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1161           col++; a += bs;
1162         }
1163       }
1164     }
1165     ierr = PetscFree(rvals);CHKERRQ(ierr);
1166     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1167     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1168     /*
1169        Everyone has to call to draw the matrix since the graphics waits are
1170        synchronized across all processors that share the PetscDraw object
1171     */
1172     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1173     if (!rank) {
1174       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1175       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1176     }
1177     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     ierr = MatDestroy(A);CHKERRQ(ierr);
1179   }
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatView_MPIBAIJ"
1185 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1186 {
1187   int        ierr;
1188   PetscTruth isascii,isdraw,issocket,isbinary;
1189 
1190   PetscFunctionBegin;
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1193   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1194   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1195   if (isascii || isdraw || issocket || isbinary) {
1196     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1197   } else {
1198     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1199   }
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1205 int MatDestroy_MPIBAIJ(Mat mat)
1206 {
1207   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1208   int         ierr;
1209 
1210   PetscFunctionBegin;
1211 #if defined(PETSC_USE_LOG)
1212   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1213 #endif
1214   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1215   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1216   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1217   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1218   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1219 #if defined (PETSC_USE_CTABLE)
1220   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1221 #else
1222   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1223 #endif
1224   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1225   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1226   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1227   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1228   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1229   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1230 #if defined(PETSC_USE_MAT_SINGLE)
1231   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1232 #endif
1233   ierr = PetscFree(baij);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatMult_MPIBAIJ"
1239 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1240 {
1241   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1242   int         ierr,nt;
1243 
1244   PetscFunctionBegin;
1245   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1246   if (nt != A->n) {
1247     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1248   }
1249   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1250   if (nt != A->m) {
1251     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1252   }
1253   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1254   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1255   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1256   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1257   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1263 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1264 {
1265   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1266   int        ierr;
1267 
1268   PetscFunctionBegin;
1269   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1270   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1271   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1272   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1278 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1279 {
1280   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1281   int         ierr;
1282 
1283   PetscFunctionBegin;
1284   /* do nondiagonal part */
1285   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1286   /* send it on its way */
1287   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1288   /* do local part */
1289   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1290   /* receive remote parts: note this assumes the values are not actually */
1291   /* inserted in yy until the next line, which is true for my implementation*/
1292   /* but is not perhaps always true. */
1293   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1299 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1300 {
1301   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1302   int         ierr;
1303 
1304   PetscFunctionBegin;
1305   /* do nondiagonal part */
1306   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1307   /* send it on its way */
1308   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1309   /* do local part */
1310   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1311   /* receive remote parts: note this assumes the values are not actually */
1312   /* inserted in yy until the next line, which is true for my implementation*/
1313   /* but is not perhaps always true. */
1314   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 /*
1319   This only works correctly for square matrices where the subblock A->A is the
1320    diagonal block
1321 */
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1324 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1325 {
1326   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1327   int         ierr;
1328 
1329   PetscFunctionBegin;
1330   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1331   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatScale_MPIBAIJ"
1337 int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1338 {
1339   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1340   int         ierr;
1341 
1342   PetscFunctionBegin;
1343   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1344   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1350 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1351 {
1352   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1353   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1354   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1355   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1356   int          *cmap,*idx_p,cstart = mat->cstart;
1357 
1358   PetscFunctionBegin;
1359   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1360   mat->getrowactive = PETSC_TRUE;
1361 
1362   if (!mat->rowvalues && (idx || v)) {
1363     /*
1364         allocate enough space to hold information from the longest row.
1365     */
1366     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1367     int     max = 1,mbs = mat->mbs,tmp;
1368     for (i=0; i<mbs; i++) {
1369       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1370       if (max < tmp) { max = tmp; }
1371     }
1372     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1373     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1374   }
1375 
1376   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1377   lrow = row - brstart;
1378 
1379   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1380   if (!v)   {pvA = 0; pvB = 0;}
1381   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1382   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1383   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1384   nztot = nzA + nzB;
1385 
1386   cmap  = mat->garray;
1387   if (v  || idx) {
1388     if (nztot) {
1389       /* Sort by increasing column numbers, assuming A and B already sorted */
1390       int imark = -1;
1391       if (v) {
1392         *v = v_p = mat->rowvalues;
1393         for (i=0; i<nzB; i++) {
1394           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1395           else break;
1396         }
1397         imark = i;
1398         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1399         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1400       }
1401       if (idx) {
1402         *idx = idx_p = mat->rowindices;
1403         if (imark > -1) {
1404           for (i=0; i<imark; i++) {
1405             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1406           }
1407         } else {
1408           for (i=0; i<nzB; i++) {
1409             if (cmap[cworkB[i]/bs] < cstart)
1410               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1411             else break;
1412           }
1413           imark = i;
1414         }
1415         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1416         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1417       }
1418     } else {
1419       if (idx) *idx = 0;
1420       if (v)   *v   = 0;
1421     }
1422   }
1423   *nz = nztot;
1424   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1425   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1431 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1432 {
1433   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1434 
1435   PetscFunctionBegin;
1436   if (baij->getrowactive == PETSC_FALSE) {
1437     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1438   }
1439   baij->getrowactive = PETSC_FALSE;
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1445 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1446 {
1447   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1448 
1449   PetscFunctionBegin;
1450   *bs = baij->bs;
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 #undef __FUNCT__
1455 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1456 int MatZeroEntries_MPIBAIJ(Mat A)
1457 {
1458   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1459   int         ierr;
1460 
1461   PetscFunctionBegin;
1462   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1463   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1469 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1470 {
1471   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1472   Mat         A = a->A,B = a->B;
1473   int         ierr;
1474   PetscReal   isend[5],irecv[5];
1475 
1476   PetscFunctionBegin;
1477   info->block_size     = (PetscReal)a->bs;
1478   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1479   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1480   isend[3] = info->memory;  isend[4] = info->mallocs;
1481   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1482   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1483   isend[3] += info->memory;  isend[4] += info->mallocs;
1484   if (flag == MAT_LOCAL) {
1485     info->nz_used      = isend[0];
1486     info->nz_allocated = isend[1];
1487     info->nz_unneeded  = isend[2];
1488     info->memory       = isend[3];
1489     info->mallocs      = isend[4];
1490   } else if (flag == MAT_GLOBAL_MAX) {
1491     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1492     info->nz_used      = irecv[0];
1493     info->nz_allocated = irecv[1];
1494     info->nz_unneeded  = irecv[2];
1495     info->memory       = irecv[3];
1496     info->mallocs      = irecv[4];
1497   } else if (flag == MAT_GLOBAL_SUM) {
1498     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1499     info->nz_used      = irecv[0];
1500     info->nz_allocated = irecv[1];
1501     info->nz_unneeded  = irecv[2];
1502     info->memory       = irecv[3];
1503     info->mallocs      = irecv[4];
1504   } else {
1505     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1506   }
1507   info->rows_global       = (PetscReal)A->M;
1508   info->columns_global    = (PetscReal)A->N;
1509   info->rows_local        = (PetscReal)A->m;
1510   info->columns_local     = (PetscReal)A->N;
1511   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1512   info->fill_ratio_needed = 0;
1513   info->factor_mallocs    = 0;
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1519 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1520 {
1521   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1522   int         ierr;
1523 
1524   PetscFunctionBegin;
1525   switch (op) {
1526   case MAT_NO_NEW_NONZERO_LOCATIONS:
1527   case MAT_YES_NEW_NONZERO_LOCATIONS:
1528   case MAT_COLUMNS_UNSORTED:
1529   case MAT_COLUMNS_SORTED:
1530   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1531   case MAT_KEEP_ZEROED_ROWS:
1532   case MAT_NEW_NONZERO_LOCATION_ERR:
1533     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1534     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1535     break;
1536   case MAT_ROW_ORIENTED:
1537     a->roworiented = PETSC_TRUE;
1538     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1539     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1540     break;
1541   case MAT_ROWS_SORTED:
1542   case MAT_ROWS_UNSORTED:
1543   case MAT_YES_NEW_DIAGONALS:
1544     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1545     break;
1546   case MAT_COLUMN_ORIENTED:
1547     a->roworiented = PETSC_FALSE;
1548     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1549     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1550     break;
1551   case MAT_IGNORE_OFF_PROC_ENTRIES:
1552     a->donotstash = PETSC_TRUE;
1553     break;
1554   case MAT_NO_NEW_DIAGONALS:
1555     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1556   case MAT_USE_HASH_TABLE:
1557     a->ht_flag = PETSC_TRUE;
1558     break;
1559   case MAT_SYMMETRIC:
1560   case MAT_STRUCTURALLY_SYMMETRIC:
1561   case MAT_NOT_SYMMETRIC:
1562   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1563   case MAT_HERMITIAN:
1564   case MAT_NOT_HERMITIAN:
1565   case MAT_SYMMETRY_ETERNAL:
1566   case MAT_NOT_SYMMETRY_ETERNAL:
1567     break;
1568   default:
1569     SETERRQ(PETSC_ERR_SUP,"unknown option");
1570   }
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1576 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1577 {
1578   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1579   Mat_SeqBAIJ *Aloc;
1580   Mat         B;
1581   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1582   int         bs=baij->bs,mbs=baij->mbs;
1583   MatScalar   *a;
1584 
1585   PetscFunctionBegin;
1586   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1587   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1588   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1589   ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1590 
1591   /* copy over the A part */
1592   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1593   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1594   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1595 
1596   for (i=0; i<mbs; i++) {
1597     rvals[0] = bs*(baij->rstart + i);
1598     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1599     for (j=ai[i]; j<ai[i+1]; j++) {
1600       col = (baij->cstart+aj[j])*bs;
1601       for (k=0; k<bs; k++) {
1602         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1603         col++; a += bs;
1604       }
1605     }
1606   }
1607   /* copy over the B part */
1608   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1609   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1610   for (i=0; i<mbs; i++) {
1611     rvals[0] = bs*(baij->rstart + i);
1612     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1613     for (j=ai[i]; j<ai[i+1]; j++) {
1614       col = baij->garray[aj[j]]*bs;
1615       for (k=0; k<bs; k++) {
1616         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1617         col++; a += bs;
1618       }
1619     }
1620   }
1621   ierr = PetscFree(rvals);CHKERRQ(ierr);
1622   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1623   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1624 
1625   if (matout) {
1626     *matout = B;
1627   } else {
1628     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1629   }
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1635 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1636 {
1637   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1638   Mat         a = baij->A,b = baij->B;
1639   int         ierr,s1,s2,s3;
1640 
1641   PetscFunctionBegin;
1642   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1643   if (rr) {
1644     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1645     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1646     /* Overlap communication with computation. */
1647     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1648   }
1649   if (ll) {
1650     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1651     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1652     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1653   }
1654   /* scale  the diagonal block */
1655   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1656 
1657   if (rr) {
1658     /* Do a scatter end and then right scale the off-diagonal block */
1659     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1660     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1661   }
1662 
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1668 int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1669 {
1670   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1671   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1672   int            *nprocs,j,idx,nsends,row;
1673   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1674   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1675   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1676   MPI_Comm       comm = A->comm;
1677   MPI_Request    *send_waits,*recv_waits;
1678   MPI_Status     recv_status,*send_status;
1679   IS             istmp;
1680   PetscTruth     found;
1681 
1682   PetscFunctionBegin;
1683   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1684   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1685 
1686   /*  first count number of contributors to each processor */
1687   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1688   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1689   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1690   for (i=0; i<N; i++) {
1691     idx   = rows[i];
1692     found = PETSC_FALSE;
1693     for (j=0; j<size; j++) {
1694       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1695         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1696       }
1697     }
1698     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1699   }
1700   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1701 
1702   /* inform other processors of number of messages and max length*/
1703   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1704 
1705   /* post receives:   */
1706   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1707   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1708   for (i=0; i<nrecvs; i++) {
1709     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1710   }
1711 
1712   /* do sends:
1713      1) starts[i] gives the starting index in svalues for stuff going to
1714      the ith processor
1715   */
1716   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1717   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1718   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1719   starts[0]  = 0;
1720   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1721   for (i=0; i<N; i++) {
1722     svalues[starts[owner[i]]++] = rows[i];
1723   }
1724   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1725 
1726   starts[0] = 0;
1727   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1728   count = 0;
1729   for (i=0; i<size; i++) {
1730     if (nprocs[2*i+1]) {
1731       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1732     }
1733   }
1734   ierr = PetscFree(starts);CHKERRQ(ierr);
1735 
1736   base = owners[rank]*bs;
1737 
1738   /*  wait on receives */
1739   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1740   source = lens + nrecvs;
1741   count  = nrecvs; slen = 0;
1742   while (count) {
1743     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1744     /* unpack receives into our local space */
1745     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1746     source[imdex]  = recv_status.MPI_SOURCE;
1747     lens[imdex]    = n;
1748     slen          += n;
1749     count--;
1750   }
1751   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1752 
1753   /* move the data into the send scatter */
1754   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1755   count = 0;
1756   for (i=0; i<nrecvs; i++) {
1757     values = rvalues + i*nmax;
1758     for (j=0; j<lens[i]; j++) {
1759       lrows[count++] = values[j] - base;
1760     }
1761   }
1762   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1763   ierr = PetscFree(lens);CHKERRQ(ierr);
1764   ierr = PetscFree(owner);CHKERRQ(ierr);
1765   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1766 
1767   /* actually zap the local rows */
1768   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1769   PetscLogObjectParent(A,istmp);
1770 
1771   /*
1772         Zero the required rows. If the "diagonal block" of the matrix
1773      is square and the user wishes to set the diagonal we use seperate
1774      code so that MatSetValues() is not called for each diagonal allocating
1775      new memory, thus calling lots of mallocs and slowing things down.
1776 
1777        Contributed by: Mathew Knepley
1778   */
1779   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1780   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1781   if (diag && (l->A->M == l->A->N)) {
1782     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1783   } else if (diag) {
1784     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1785     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1786       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1787 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1788     }
1789     for (i=0; i<slen; i++) {
1790       row  = lrows[i] + rstart_bs;
1791       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1792     }
1793     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1794     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   } else {
1796     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1797   }
1798 
1799   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1800   ierr = PetscFree(lrows);CHKERRQ(ierr);
1801 
1802   /* wait on sends */
1803   if (nsends) {
1804     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1805     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1806     ierr = PetscFree(send_status);CHKERRQ(ierr);
1807   }
1808   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1809   ierr = PetscFree(svalues);CHKERRQ(ierr);
1810 
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1816 int MatPrintHelp_MPIBAIJ(Mat A)
1817 {
1818   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1819   MPI_Comm    comm = A->comm;
1820   static int  called = 0;
1821   int         ierr;
1822 
1823   PetscFunctionBegin;
1824   if (!a->rank) {
1825     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1826   }
1827   if (called) {PetscFunctionReturn(0);} else called = 1;
1828   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1829   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1835 int MatSetUnfactored_MPIBAIJ(Mat A)
1836 {
1837   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1838   int         ierr;
1839 
1840   PetscFunctionBegin;
1841   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatEqual_MPIBAIJ"
1849 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1850 {
1851   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1852   Mat         a,b,c,d;
1853   PetscTruth  flg;
1854   int         ierr;
1855 
1856   PetscFunctionBegin;
1857   a = matA->A; b = matA->B;
1858   c = matB->A; d = matB->B;
1859 
1860   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1861   if (flg == PETSC_TRUE) {
1862     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1863   }
1864   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1871 int MatSetUpPreallocation_MPIBAIJ(Mat A)
1872 {
1873   int        ierr;
1874 
1875   PetscFunctionBegin;
1876   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 /* -------------------------------------------------------------------*/
1881 static struct _MatOps MatOps_Values = {
1882        MatSetValues_MPIBAIJ,
1883        MatGetRow_MPIBAIJ,
1884        MatRestoreRow_MPIBAIJ,
1885        MatMult_MPIBAIJ,
1886 /* 4*/ MatMultAdd_MPIBAIJ,
1887        MatMultTranspose_MPIBAIJ,
1888        MatMultTransposeAdd_MPIBAIJ,
1889        0,
1890        0,
1891        0,
1892 /*10*/ 0,
1893        0,
1894        0,
1895        0,
1896        MatTranspose_MPIBAIJ,
1897 /*15*/ MatGetInfo_MPIBAIJ,
1898        MatEqual_MPIBAIJ,
1899        MatGetDiagonal_MPIBAIJ,
1900        MatDiagonalScale_MPIBAIJ,
1901        MatNorm_MPIBAIJ,
1902 /*20*/ MatAssemblyBegin_MPIBAIJ,
1903        MatAssemblyEnd_MPIBAIJ,
1904        0,
1905        MatSetOption_MPIBAIJ,
1906        MatZeroEntries_MPIBAIJ,
1907 /*25*/ MatZeroRows_MPIBAIJ,
1908        0,
1909        0,
1910        0,
1911        0,
1912 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1913        0,
1914        0,
1915        0,
1916        0,
1917 /*35*/ MatDuplicate_MPIBAIJ,
1918        0,
1919        0,
1920        0,
1921        0,
1922 /*40*/ 0,
1923        MatGetSubMatrices_MPIBAIJ,
1924        MatIncreaseOverlap_MPIBAIJ,
1925        MatGetValues_MPIBAIJ,
1926        0,
1927 /*45*/ MatPrintHelp_MPIBAIJ,
1928        MatScale_MPIBAIJ,
1929        0,
1930        0,
1931        0,
1932 /*50*/ MatGetBlockSize_MPIBAIJ,
1933        0,
1934        0,
1935        0,
1936        0,
1937 /*55*/ 0,
1938        0,
1939        MatSetUnfactored_MPIBAIJ,
1940        0,
1941        MatSetValuesBlocked_MPIBAIJ,
1942 /*60*/ 0,
1943        MatDestroy_MPIBAIJ,
1944        MatView_MPIBAIJ,
1945        MatGetPetscMaps_Petsc,
1946        0,
1947 /*65*/ 0,
1948        0,
1949        0,
1950        0,
1951        0,
1952 /*70*/ MatGetRowMax_MPIBAIJ,
1953        0,
1954        0,
1955        0,
1956        0,
1957 /*75*/ 0,
1958        0,
1959        0,
1960        0,
1961        0,
1962 /*80*/ 0,
1963        0,
1964        0,
1965        0,
1966 /*85*/ MatLoad_MPIBAIJ
1967 };
1968 
1969 
1970 EXTERN_C_BEGIN
1971 #undef __FUNCT__
1972 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1973 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1974 {
1975   PetscFunctionBegin;
1976   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1977   *iscopy = PETSC_FALSE;
1978   PetscFunctionReturn(0);
1979 }
1980 EXTERN_C_END
1981 
1982 EXTERN_C_BEGIN
1983 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
1984 EXTERN_C_END
1985 
1986 EXTERN_C_BEGIN
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
1989 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1990 {
1991   Mat_MPIBAIJ  *b;
1992   int          ierr,i;
1993 
1994   PetscFunctionBegin;
1995   B->preallocated = PETSC_TRUE;
1996   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1997 
1998   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1999   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2000   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2001   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2002   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2003   if (d_nnz) {
2004   for (i=0; i<B->m/bs; i++) {
2005       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2006     }
2007   }
2008   if (o_nnz) {
2009     for (i=0; i<B->m/bs; i++) {
2010       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2011     }
2012   }
2013 
2014   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2015   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2016   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2017   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2018 
2019   b = (Mat_MPIBAIJ*)B->data;
2020   b->bs  = bs;
2021   b->bs2 = bs*bs;
2022   b->mbs = B->m/bs;
2023   b->nbs = B->n/bs;
2024   b->Mbs = B->M/bs;
2025   b->Nbs = B->N/bs;
2026 
2027   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2028   b->rowners[0]    = 0;
2029   for (i=2; i<=b->size; i++) {
2030     b->rowners[i] += b->rowners[i-1];
2031   }
2032   b->rstart    = b->rowners[b->rank];
2033   b->rend      = b->rowners[b->rank+1];
2034 
2035   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2036   b->cowners[0] = 0;
2037   for (i=2; i<=b->size; i++) {
2038     b->cowners[i] += b->cowners[i-1];
2039   }
2040   b->cstart    = b->cowners[b->rank];
2041   b->cend      = b->cowners[b->rank+1];
2042 
2043   for (i=0; i<=b->size; i++) {
2044     b->rowners_bs[i] = b->rowners[i]*bs;
2045   }
2046   b->rstart_bs = b->rstart*bs;
2047   b->rend_bs   = b->rend*bs;
2048   b->cstart_bs = b->cstart*bs;
2049   b->cend_bs   = b->cend*bs;
2050 
2051   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2052   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2053   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2054   PetscLogObjectParent(B,b->A);
2055   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2056   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2057   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2058   PetscLogObjectParent(B,b->B);
2059 
2060   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2061 
2062   PetscFunctionReturn(0);
2063 }
2064 EXTERN_C_END
2065 
2066 EXTERN_C_BEGIN
2067 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2068 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2069 EXTERN_C_END
2070 
2071 /*MC
2072    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2073 
2074    Options Database Keys:
2075 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2076 
2077   Level: beginner
2078 
2079 .seealso: MatCreateMPIBAIJ
2080 M*/
2081 
2082 EXTERN_C_BEGIN
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatCreate_MPIBAIJ"
2085 int MatCreate_MPIBAIJ(Mat B)
2086 {
2087   Mat_MPIBAIJ  *b;
2088   int          ierr;
2089   PetscTruth   flg;
2090 
2091   PetscFunctionBegin;
2092   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2093   B->data = (void*)b;
2094 
2095   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2096   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2097   B->mapping    = 0;
2098   B->factor     = 0;
2099   B->assembled  = PETSC_FALSE;
2100 
2101   B->insertmode = NOT_SET_VALUES;
2102   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2103   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2104 
2105   /* build local table of row and column ownerships */
2106   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2107   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2108   b->cowners    = b->rowners + b->size + 2;
2109   b->rowners_bs = b->cowners + b->size + 2;
2110 
2111   /* build cache for off array entries formed */
2112   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2113   b->donotstash  = PETSC_FALSE;
2114   b->colmap      = PETSC_NULL;
2115   b->garray      = PETSC_NULL;
2116   b->roworiented = PETSC_TRUE;
2117 
2118 #if defined(PETSC_USE_MAT_SINGLE)
2119   /* stuff for MatSetValues_XXX in single precision */
2120   b->setvalueslen     = 0;
2121   b->setvaluescopy    = PETSC_NULL;
2122 #endif
2123 
2124   /* stuff used in block assembly */
2125   b->barray       = 0;
2126 
2127   /* stuff used for matrix vector multiply */
2128   b->lvec         = 0;
2129   b->Mvctx        = 0;
2130 
2131   /* stuff for MatGetRow() */
2132   b->rowindices   = 0;
2133   b->rowvalues    = 0;
2134   b->getrowactive = PETSC_FALSE;
2135 
2136   /* hash table stuff */
2137   b->ht           = 0;
2138   b->hd           = 0;
2139   b->ht_size      = 0;
2140   b->ht_flag      = PETSC_FALSE;
2141   b->ht_fact      = 0;
2142   b->ht_total_ct  = 0;
2143   b->ht_insert_ct = 0;
2144 
2145   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2146   if (flg) {
2147     PetscReal fact = 1.39;
2148     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2149     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2150     if (fact <= 1.0) fact = 1.39;
2151     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2152     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2153   }
2154   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2155                                      "MatStoreValues_MPIBAIJ",
2156                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2157   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2158                                      "MatRetrieveValues_MPIBAIJ",
2159                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2160   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2161                                      "MatGetDiagonalBlock_MPIBAIJ",
2162                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2163   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2164                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2165                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2166   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2167                                      "MatDiagonalScaleLocal_MPIBAIJ",
2168                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2169   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2170                                      "MatSetHashTableFactor_MPIBAIJ",
2171                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 EXTERN_C_END
2175 
2176 /*MC
2177    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2178 
2179    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2180    and MATMPIBAIJ otherwise.
2181 
2182    Options Database Keys:
2183 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2184 
2185   Level: beginner
2186 
2187 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2188 M*/
2189 
2190 EXTERN_C_BEGIN
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatCreate_BAIJ"
2193 int MatCreate_BAIJ(Mat A) {
2194   int ierr,size;
2195 
2196   PetscFunctionBegin;
2197   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2198   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2199   if (size == 1) {
2200     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2201   } else {
2202     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2203   }
2204   PetscFunctionReturn(0);
2205 }
2206 EXTERN_C_END
2207 
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2210 /*@C
2211    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2212    (block compressed row).  For good matrix assembly performance
2213    the user should preallocate the matrix storage by setting the parameters
2214    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2215    performance can be increased by more than a factor of 50.
2216 
2217    Collective on Mat
2218 
2219    Input Parameters:
2220 +  A - the matrix
2221 .  bs   - size of blockk
2222 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2223            submatrix  (same for all local rows)
2224 .  d_nnz - array containing the number of block nonzeros in the various block rows
2225            of the in diagonal portion of the local (possibly different for each block
2226            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2227 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2228            submatrix (same for all local rows).
2229 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2230            off-diagonal portion of the local submatrix (possibly different for
2231            each block row) or PETSC_NULL.
2232 
2233    Output Parameter:
2234 
2235 
2236    Options Database Keys:
2237 .   -mat_no_unroll - uses code that does not unroll the loops in the
2238                      block calculations (much slower)
2239 .   -mat_block_size - size of the blocks to use
2240 
2241    Notes:
2242    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2243    than it must be used on all processors that share the object for that argument.
2244 
2245    Storage Information:
2246    For a square global matrix we define each processor's diagonal portion
2247    to be its local rows and the corresponding columns (a square submatrix);
2248    each processor's off-diagonal portion encompasses the remainder of the
2249    local matrix (a rectangular submatrix).
2250 
2251    The user can specify preallocated storage for the diagonal part of
2252    the local submatrix with either d_nz or d_nnz (not both).  Set
2253    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2254    memory allocation.  Likewise, specify preallocated storage for the
2255    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2256 
2257    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2258    the figure below we depict these three local rows and all columns (0-11).
2259 
2260 .vb
2261            0 1 2 3 4 5 6 7 8 9 10 11
2262           -------------------
2263    row 3  |  o o o d d d o o o o o o
2264    row 4  |  o o o d d d o o o o o o
2265    row 5  |  o o o d d d o o o o o o
2266           -------------------
2267 .ve
2268 
2269    Thus, any entries in the d locations are stored in the d (diagonal)
2270    submatrix, and any entries in the o locations are stored in the
2271    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2272    stored simply in the MATSEQBAIJ format for compressed row storage.
2273 
2274    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2275    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2276    In general, for PDE problems in which most nonzeros are near the diagonal,
2277    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2278    or you will get TERRIBLE performance; see the users' manual chapter on
2279    matrices.
2280 
2281    Level: intermediate
2282 
2283 .keywords: matrix, block, aij, compressed row, sparse, parallel
2284 
2285 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2286 @*/
2287 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2288 {
2289   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2290 
2291   PetscFunctionBegin;
2292   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2293   if (f) {
2294     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2295   }
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatCreateMPIBAIJ"
2301 /*@C
2302    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2303    (block compressed row).  For good matrix assembly performance
2304    the user should preallocate the matrix storage by setting the parameters
2305    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2306    performance can be increased by more than a factor of 50.
2307 
2308    Collective on MPI_Comm
2309 
2310    Input Parameters:
2311 +  comm - MPI communicator
2312 .  bs   - size of blockk
2313 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2314            This value should be the same as the local size used in creating the
2315            y vector for the matrix-vector product y = Ax.
2316 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2317            This value should be the same as the local size used in creating the
2318            x vector for the matrix-vector product y = Ax.
2319 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2320 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2321 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2322            submatrix  (same for all local rows)
2323 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2324            of the in diagonal portion of the local (possibly different for each block
2325            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2326 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2327            submatrix (same for all local rows).
2328 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2329            off-diagonal portion of the local submatrix (possibly different for
2330            each block row) or PETSC_NULL.
2331 
2332    Output Parameter:
2333 .  A - the matrix
2334 
2335    Options Database Keys:
2336 .   -mat_no_unroll - uses code that does not unroll the loops in the
2337                      block calculations (much slower)
2338 .   -mat_block_size - size of the blocks to use
2339 
2340    Notes:
2341    A nonzero block is any block that as 1 or more nonzeros in it
2342 
2343    The user MUST specify either the local or global matrix dimensions
2344    (possibly both).
2345 
2346    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2347    than it must be used on all processors that share the object for that argument.
2348 
2349    Storage Information:
2350    For a square global matrix we define each processor's diagonal portion
2351    to be its local rows and the corresponding columns (a square submatrix);
2352    each processor's off-diagonal portion encompasses the remainder of the
2353    local matrix (a rectangular submatrix).
2354 
2355    The user can specify preallocated storage for the diagonal part of
2356    the local submatrix with either d_nz or d_nnz (not both).  Set
2357    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2358    memory allocation.  Likewise, specify preallocated storage for the
2359    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2360 
2361    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2362    the figure below we depict these three local rows and all columns (0-11).
2363 
2364 .vb
2365            0 1 2 3 4 5 6 7 8 9 10 11
2366           -------------------
2367    row 3  |  o o o d d d o o o o o o
2368    row 4  |  o o o d d d o o o o o o
2369    row 5  |  o o o d d d o o o o o o
2370           -------------------
2371 .ve
2372 
2373    Thus, any entries in the d locations are stored in the d (diagonal)
2374    submatrix, and any entries in the o locations are stored in the
2375    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2376    stored simply in the MATSEQBAIJ format for compressed row storage.
2377 
2378    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2379    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2380    In general, for PDE problems in which most nonzeros are near the diagonal,
2381    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2382    or you will get TERRIBLE performance; see the users' manual chapter on
2383    matrices.
2384 
2385    Level: intermediate
2386 
2387 .keywords: matrix, block, aij, compressed row, sparse, parallel
2388 
2389 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2390 @*/
2391 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2392 {
2393   int ierr,size;
2394 
2395   PetscFunctionBegin;
2396   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2397   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2398   if (size > 1) {
2399     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2400     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2401   } else {
2402     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2403     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2404   }
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2410 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2411 {
2412   Mat         mat;
2413   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2414   int         ierr,len=0;
2415 
2416   PetscFunctionBegin;
2417   *newmat       = 0;
2418   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2419   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2420 
2421   ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2422   mat->factor       = matin->factor;
2423   mat->preallocated = PETSC_TRUE;
2424   mat->assembled    = PETSC_TRUE;
2425   mat->insertmode   = NOT_SET_VALUES;
2426 
2427   a      = (Mat_MPIBAIJ*)mat->data;
2428   a->bs  = oldmat->bs;
2429   a->bs2 = oldmat->bs2;
2430   a->mbs = oldmat->mbs;
2431   a->nbs = oldmat->nbs;
2432   a->Mbs = oldmat->Mbs;
2433   a->Nbs = oldmat->Nbs;
2434 
2435   a->rstart       = oldmat->rstart;
2436   a->rend         = oldmat->rend;
2437   a->cstart       = oldmat->cstart;
2438   a->cend         = oldmat->cend;
2439   a->size         = oldmat->size;
2440   a->rank         = oldmat->rank;
2441   a->donotstash   = oldmat->donotstash;
2442   a->roworiented  = oldmat->roworiented;
2443   a->rowindices   = 0;
2444   a->rowvalues    = 0;
2445   a->getrowactive = PETSC_FALSE;
2446   a->barray       = 0;
2447   a->rstart_bs    = oldmat->rstart_bs;
2448   a->rend_bs      = oldmat->rend_bs;
2449   a->cstart_bs    = oldmat->cstart_bs;
2450   a->cend_bs      = oldmat->cend_bs;
2451 
2452   /* hash table stuff */
2453   a->ht           = 0;
2454   a->hd           = 0;
2455   a->ht_size      = 0;
2456   a->ht_flag      = oldmat->ht_flag;
2457   a->ht_fact      = oldmat->ht_fact;
2458   a->ht_total_ct  = 0;
2459   a->ht_insert_ct = 0;
2460 
2461   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2462   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2463   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2464   if (oldmat->colmap) {
2465 #if defined (PETSC_USE_CTABLE)
2466   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2467 #else
2468   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2469   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2470   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2471 #endif
2472   } else a->colmap = 0;
2473 
2474   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2475     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2476     PetscLogObjectMemory(mat,len*sizeof(int));
2477     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2478   } else a->garray = 0;
2479 
2480   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2481   PetscLogObjectParent(mat,a->lvec);
2482   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2483   PetscLogObjectParent(mat,a->Mvctx);
2484 
2485   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2486   PetscLogObjectParent(mat,a->A);
2487   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2488   PetscLogObjectParent(mat,a->B);
2489   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2490   *newmat = mat;
2491 
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 #include "petscsys.h"
2496 
2497 #undef __FUNCT__
2498 #define __FUNCT__ "MatLoad_MPIBAIJ"
2499 int MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2500 {
2501   Mat          A;
2502   int          i,nz,ierr,j,rstart,rend,fd;
2503   PetscScalar  *vals,*buf;
2504   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2505   MPI_Status   status;
2506   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2507   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2508   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2509   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2510   int          dcount,kmax,k,nzcount,tmp;
2511 
2512   PetscFunctionBegin;
2513   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2514 
2515   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2516   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2517   if (!rank) {
2518     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2519     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2520     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2521     if (header[3] < 0) {
2522       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2523     }
2524   }
2525 
2526   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2527   M = header[1]; N = header[2];
2528 
2529   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2530 
2531   /*
2532      This code adds extra rows to make sure the number of rows is
2533      divisible by the blocksize
2534   */
2535   Mbs        = M/bs;
2536   extra_rows = bs - M + bs*(Mbs);
2537   if (extra_rows == bs) extra_rows = 0;
2538   else                  Mbs++;
2539   if (extra_rows &&!rank) {
2540     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2541   }
2542 
2543   /* determine ownership of all rows */
2544   mbs        = Mbs/size + ((Mbs % size) > rank);
2545   m          = mbs*bs;
2546   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2547   browners   = rowners + size + 1;
2548   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2549   rowners[0] = 0;
2550   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2551   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2552   rstart = rowners[rank];
2553   rend   = rowners[rank+1];
2554 
2555   /* distribute row lengths to all processors */
2556   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2557   if (!rank) {
2558     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2559     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2560     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2561     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2562     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2563     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2564     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2565   } else {
2566     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2567   }
2568 
2569   if (!rank) {
2570     /* calculate the number of nonzeros on each processor */
2571     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2572     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2573     for (i=0; i<size; i++) {
2574       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2575         procsnz[i] += rowlengths[j];
2576       }
2577     }
2578     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2579 
2580     /* determine max buffer needed and allocate it */
2581     maxnz = 0;
2582     for (i=0; i<size; i++) {
2583       maxnz = PetscMax(maxnz,procsnz[i]);
2584     }
2585     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2586 
2587     /* read in my part of the matrix column indices  */
2588     nz     = procsnz[0];
2589     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2590     mycols = ibuf;
2591     if (size == 1)  nz -= extra_rows;
2592     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2593     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2594 
2595     /* read in every ones (except the last) and ship off */
2596     for (i=1; i<size-1; i++) {
2597       nz   = procsnz[i];
2598       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2599       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2600     }
2601     /* read in the stuff for the last proc */
2602     if (size != 1) {
2603       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2604       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2605       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2606       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2607     }
2608     ierr = PetscFree(cols);CHKERRQ(ierr);
2609   } else {
2610     /* determine buffer space needed for message */
2611     nz = 0;
2612     for (i=0; i<m; i++) {
2613       nz += locrowlens[i];
2614     }
2615     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2616     mycols = ibuf;
2617     /* receive message of column indices*/
2618     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2619     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2620     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2621   }
2622 
2623   /* loop over local rows, determining number of off diagonal entries */
2624   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2625   odlens   = dlens + (rend-rstart);
2626   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2627   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2628   masked1  = mask    + Mbs;
2629   masked2  = masked1 + Mbs;
2630   rowcount = 0; nzcount = 0;
2631   for (i=0; i<mbs; i++) {
2632     dcount  = 0;
2633     odcount = 0;
2634     for (j=0; j<bs; j++) {
2635       kmax = locrowlens[rowcount];
2636       for (k=0; k<kmax; k++) {
2637         tmp = mycols[nzcount++]/bs;
2638         if (!mask[tmp]) {
2639           mask[tmp] = 1;
2640           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2641           else masked1[dcount++] = tmp;
2642         }
2643       }
2644       rowcount++;
2645     }
2646 
2647     dlens[i]  = dcount;
2648     odlens[i] = odcount;
2649 
2650     /* zero out the mask elements we set */
2651     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2652     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2653   }
2654 
2655   /* create our matrix */
2656   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2657   ierr = MatSetType(A,type);CHKERRQ(ierr)
2658   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2659 
2660   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2661   MatSetOption(A,MAT_COLUMNS_SORTED);
2662 
2663   if (!rank) {
2664     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2665     /* read in my part of the matrix numerical values  */
2666     nz = procsnz[0];
2667     vals = buf;
2668     mycols = ibuf;
2669     if (size == 1)  nz -= extra_rows;
2670     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2671     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2672 
2673     /* insert into matrix */
2674     jj      = rstart*bs;
2675     for (i=0; i<m; i++) {
2676       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2677       mycols += locrowlens[i];
2678       vals   += locrowlens[i];
2679       jj++;
2680     }
2681     /* read in other processors (except the last one) and ship out */
2682     for (i=1; i<size-1; i++) {
2683       nz   = procsnz[i];
2684       vals = buf;
2685       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2686       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2687     }
2688     /* the last proc */
2689     if (size != 1){
2690       nz   = procsnz[i] - extra_rows;
2691       vals = buf;
2692       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2693       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2694       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2695     }
2696     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2697   } else {
2698     /* receive numeric values */
2699     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2700 
2701     /* receive message of values*/
2702     vals   = buf;
2703     mycols = ibuf;
2704     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2705     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2706     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2707 
2708     /* insert into matrix */
2709     jj      = rstart*bs;
2710     for (i=0; i<m; i++) {
2711       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2712       mycols += locrowlens[i];
2713       vals   += locrowlens[i];
2714       jj++;
2715     }
2716   }
2717   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2718   ierr = PetscFree(buf);CHKERRQ(ierr);
2719   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2720   ierr = PetscFree(rowners);CHKERRQ(ierr);
2721   ierr = PetscFree(dlens);CHKERRQ(ierr);
2722   ierr = PetscFree(mask);CHKERRQ(ierr);
2723   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2724   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2725 
2726   *newmat = A;
2727   PetscFunctionReturn(0);
2728 }
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2732 /*@
2733    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2734 
2735    Input Parameters:
2736 .  mat  - the matrix
2737 .  fact - factor
2738 
2739    Collective on Mat
2740 
2741    Level: advanced
2742 
2743   Notes:
2744    This can also be set by the command line option: -mat_use_hash_table fact
2745 
2746 .keywords: matrix, hashtable, factor, HT
2747 
2748 .seealso: MatSetOption()
2749 @*/
2750 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2751 {
2752   int ierr,(*f)(Mat,PetscReal);
2753 
2754   PetscFunctionBegin;
2755   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2756   if (f) {
2757     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2758   }
2759   PetscFunctionReturn(0);
2760 }
2761 
2762 #undef __FUNCT__
2763 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2764 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2765 {
2766   Mat_MPIBAIJ *baij;
2767 
2768   PetscFunctionBegin;
2769   baij = (Mat_MPIBAIJ*)mat->data;
2770   baij->ht_fact = fact;
2771   PetscFunctionReturn(0);
2772 }
2773 
2774 #undef __FUNCT__
2775 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2776 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2777 {
2778   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2779   PetscFunctionBegin;
2780   *Ad     = a->A;
2781   *Ao     = a->B;
2782   *colmap = a->garray;
2783   PetscFunctionReturn(0);
2784 }
2785