xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2fc52814d27bf1f4e71021c1c3ebb532b583ed60)
1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN int DisAssemble_MPIBAIJ(Mat);
8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
15 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
31 #else
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
33 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
35 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
37 #endif
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
42 {
43   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
44   int          ierr,i;
45   PetscScalar  *va,*vb;
46   Vec          vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 int MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
74   int         ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 int MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
89   int         ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 static int CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110   int         nbs = B->nbs,i,bs=B->bs,ierr;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (int*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (int*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
286 {
287   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288   int         ierr,i,N = m*n;
289   MatScalar   *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298 
299   for (i=0; i<N; i++) {
300     vsingle[i] = v[i];
301   }
302   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
309 {
310   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311   int         ierr,i,N = m*n*b->bs2;
312   MatScalar   *vsingle;
313 
314   PetscFunctionBegin;
315   if (N > b->setvalueslen) {
316     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
317     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
318     b->setvalueslen  = N;
319   }
320   vsingle = b->setvaluescopy;
321   for (i=0; i<N; i++) {
322     vsingle[i] = v[i];
323   }
324   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
331 {
332   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
333   int         ierr,i,N = m*n;
334   MatScalar   *vsingle;
335 
336   PetscFunctionBegin;
337   if (N > b->setvalueslen) {
338     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
339     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
340     b->setvalueslen  = N;
341   }
342   vsingle = b->setvaluescopy;
343   for (i=0; i<N; i++) {
344     vsingle[i] = v[i];
345   }
346   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
353 {
354   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
355   int         ierr,i,N = m*n*b->bs2;
356   MatScalar   *vsingle;
357 
358   PetscFunctionBegin;
359   if (N > b->setvalueslen) {
360     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
361     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
362     b->setvalueslen  = N;
363   }
364   vsingle = b->setvaluescopy;
365   for (i=0; i<N; i++) {
366     vsingle[i] = v[i];
367   }
368   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 #endif
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
376 {
377   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
378   MatScalar   value;
379   PetscTruth  roworiented = baij->roworiented;
380   int         ierr,i,j,row,col;
381   int         rstart_orig=baij->rstart_bs;
382   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
383   int         cend_orig=baij->cend_bs,bs=baij->bs;
384 
385   /* Some Variables required in the macro */
386   Mat         A = baij->A;
387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
388   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
389   MatScalar   *aa=a->a;
390 
391   Mat         B = baij->B;
392   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
393   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
394   MatScalar   *ba=b->a;
395 
396   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
397   int         low,high,t,ridx,cidx,bs2=a->bs2;
398   MatScalar   *ap,*bap;
399 
400   PetscFunctionBegin;
401   for (i=0; i<m; i++) {
402     if (im[i] < 0) continue;
403 #if defined(PETSC_USE_BOPT_g)
404     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
405 #endif
406     if (im[i] >= rstart_orig && im[i] < rend_orig) {
407       row = im[i] - rstart_orig;
408       for (j=0; j<n; j++) {
409         if (in[j] >= cstart_orig && in[j] < cend_orig){
410           col = in[j] - cstart_orig;
411           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
413           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
414         } else if (in[j] < 0) continue;
415 #if defined(PETSC_USE_BOPT_g)
416         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
417 #endif
418         else {
419           if (mat->was_assembled) {
420             if (!baij->colmap) {
421               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
422             }
423 #if defined (PETSC_USE_CTABLE)
424             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
425             col  = col - 1;
426 #else
427             col = baij->colmap[in[j]/bs] - 1;
428 #endif
429             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
430               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
431               col =  in[j];
432               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
433               B = baij->B;
434               b = (Mat_SeqBAIJ*)(B)->data;
435               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
436               ba=b->a;
437             } else col += in[j]%bs;
438           } else col = in[j];
439           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
440           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
441           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
442         }
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
460 {
461   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
462   const MatScalar *value;
463   MatScalar       *barray=baij->barray;
464   PetscTruth      roworiented = baij->roworiented;
465   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
466   int             rend=baij->rend,cstart=baij->cstart,stepval;
467   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
468 
469   PetscFunctionBegin;
470   if(!barray) {
471     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
472     baij->barray = barray;
473   }
474 
475   if (roworiented) {
476     stepval = (n-1)*bs;
477   } else {
478     stepval = (m-1)*bs;
479   }
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_BOPT_g)
483     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row = im[i] - rstart;
487       for (j=0; j<n; j++) {
488         /* If NumCol = 1 then a copy is not required */
489         if ((roworiented) && (n == 1)) {
490           barray = (MatScalar*)v + i*bs2;
491         } else if((!roworiented) && (m == 1)) {
492           barray = (MatScalar*)v + j*bs2;
493         } else { /* Here a copy is required */
494           if (roworiented) {
495             value = v + i*(stepval+bs)*bs + j*bs;
496           } else {
497             value = v + j*(stepval+bs)*bs + i*bs;
498           }
499           for (ii=0; ii<bs; ii++,value+=stepval) {
500             for (jj=0; jj<bs; jj++) {
501               *barray++  = *value++;
502             }
503           }
504           barray -=bs2;
505         }
506 
507         if (in[j] >= cstart && in[j] < cend){
508           col  = in[j] - cstart;
509           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
510         }
511         else if (in[j] < 0) continue;
512 #if defined(PETSC_USE_BOPT_g)
513         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
514 #endif
515         else {
516           if (mat->was_assembled) {
517             if (!baij->colmap) {
518               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
519             }
520 
521 #if defined(PETSC_USE_BOPT_g)
522 #if defined (PETSC_USE_CTABLE)
523             { int data;
524               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
525               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
526             }
527 #else
528             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
529 #endif
530 #endif
531 #if defined (PETSC_USE_CTABLE)
532 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
533             col  = (col - 1)/bs;
534 #else
535             col = (baij->colmap[in[j]] - 1)/bs;
536 #endif
537             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
538               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
539               col =  in[j];
540             }
541           }
542           else col = in[j];
543           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
544         }
545       }
546     } else {
547       if (!baij->donotstash) {
548         if (roworiented) {
549           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
550         } else {
551           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
552         }
553       }
554     }
555   }
556   PetscFunctionReturn(0);
557 }
558 
559 #define HASH_KEY 0.6180339887
560 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
561 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
563 #undef __FUNCT__
564 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
565 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
566 {
567   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
568   PetscTruth  roworiented = baij->roworiented;
569   int         ierr,i,j,row,col;
570   int         rstart_orig=baij->rstart_bs;
571   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
572   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
573   PetscReal   tmp;
574   MatScalar   **HD = baij->hd,value;
575 #if defined(PETSC_USE_BOPT_g)
576   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
577 #endif
578 
579   PetscFunctionBegin;
580 
581   for (i=0; i<m; i++) {
582 #if defined(PETSC_USE_BOPT_g)
583     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
584     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
585 #endif
586       row = im[i];
587     if (row >= rstart_orig && row < rend_orig) {
588       for (j=0; j<n; j++) {
589         col = in[j];
590         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
591         /* Look up into the Hash Table */
592         key = (row/bs)*Nbs+(col/bs)+1;
593         h1  = HASH(size,key,tmp);
594 
595 
596         idx = h1;
597 #if defined(PETSC_USE_BOPT_g)
598         insert_ct++;
599         total_ct++;
600         if (HT[idx] != key) {
601           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
602           if (idx == size) {
603             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
604             if (idx == h1) {
605               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
606             }
607           }
608         }
609 #else
610         if (HT[idx] != key) {
611           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
612           if (idx == size) {
613             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
614             if (idx == h1) {
615               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
616             }
617           }
618         }
619 #endif
620         /* A HASH table entry is found, so insert the values at the correct address */
621         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
622         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
623       }
624     } else {
625       if (!baij->donotstash) {
626         if (roworiented) {
627           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
628         } else {
629           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
630         }
631       }
632     }
633   }
634 #if defined(PETSC_USE_BOPT_g)
635   baij->ht_total_ct = total_ct;
636   baij->ht_insert_ct = insert_ct;
637 #endif
638   PetscFunctionReturn(0);
639 }
640 
641 #undef __FUNCT__
642 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
643 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
644 {
645   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
646   PetscTruth      roworiented = baij->roworiented;
647   int             ierr,i,j,ii,jj,row,col;
648   int             rstart=baij->rstart ;
649   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
650   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
651   PetscReal       tmp;
652   MatScalar       **HD = baij->hd,*baij_a;
653   const MatScalar *v_t,*value;
654 #if defined(PETSC_USE_BOPT_g)
655   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
656 #endif
657 
658   PetscFunctionBegin;
659 
660   if (roworiented) {
661     stepval = (n-1)*bs;
662   } else {
663     stepval = (m-1)*bs;
664   }
665   for (i=0; i<m; i++) {
666 #if defined(PETSC_USE_BOPT_g)
667     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
668     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
669 #endif
670     row   = im[i];
671     v_t   = v + i*bs2;
672     if (row >= rstart && row < rend) {
673       for (j=0; j<n; j++) {
674         col = in[j];
675 
676         /* Look up into the Hash Table */
677         key = row*Nbs+col+1;
678         h1  = HASH(size,key,tmp);
679 
680         idx = h1;
681 #if defined(PETSC_USE_BOPT_g)
682         total_ct++;
683         insert_ct++;
684        if (HT[idx] != key) {
685           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
686           if (idx == size) {
687             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
688             if (idx == h1) {
689               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
690             }
691           }
692         }
693 #else
694         if (HT[idx] != key) {
695           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
696           if (idx == size) {
697             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
698             if (idx == h1) {
699               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
700             }
701           }
702         }
703 #endif
704         baij_a = HD[idx];
705         if (roworiented) {
706           /*value = v + i*(stepval+bs)*bs + j*bs;*/
707           /* value = v + (i*(stepval+bs)+j)*bs; */
708           value = v_t;
709           v_t  += bs;
710           if (addv == ADD_VALUES) {
711             for (ii=0; ii<bs; ii++,value+=stepval) {
712               for (jj=ii; jj<bs2; jj+=bs) {
713                 baij_a[jj]  += *value++;
714               }
715             }
716           } else {
717             for (ii=0; ii<bs; ii++,value+=stepval) {
718               for (jj=ii; jj<bs2; jj+=bs) {
719                 baij_a[jj]  = *value++;
720               }
721             }
722           }
723         } else {
724           value = v + j*(stepval+bs)*bs + i*bs;
725           if (addv == ADD_VALUES) {
726             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
727               for (jj=0; jj<bs; jj++) {
728                 baij_a[jj]  += *value++;
729               }
730             }
731           } else {
732             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
733               for (jj=0; jj<bs; jj++) {
734                 baij_a[jj]  = *value++;
735               }
736             }
737           }
738         }
739       }
740     } else {
741       if (!baij->donotstash) {
742         if (roworiented) {
743           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
744         } else {
745           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
746         }
747       }
748     }
749   }
750 #if defined(PETSC_USE_BOPT_g)
751   baij->ht_total_ct = total_ct;
752   baij->ht_insert_ct = insert_ct;
753 #endif
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatGetValues_MPIBAIJ"
759 int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
760 {
761   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
762   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
763   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
764 
765   PetscFunctionBegin;
766   for (i=0; i<m; i++) {
767     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
768     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
769     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
770       row = idxm[i] - bsrstart;
771       for (j=0; j<n; j++) {
772         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
773         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
774         if (idxn[j] >= bscstart && idxn[j] < bscend){
775           col = idxn[j] - bscstart;
776           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
777         } else {
778           if (!baij->colmap) {
779             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
780           }
781 #if defined (PETSC_USE_CTABLE)
782           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
783           data --;
784 #else
785           data = baij->colmap[idxn[j]/bs]-1;
786 #endif
787           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
788           else {
789             col  = data + idxn[j]%bs;
790             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
791           }
792         }
793       }
794     } else {
795       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
796     }
797   }
798  PetscFunctionReturn(0);
799 }
800 
801 #undef __FUNCT__
802 #define __FUNCT__ "MatNorm_MPIBAIJ"
803 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
804 {
805   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
806   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
807   int        ierr,i,bs2=baij->bs2;
808   PetscReal  sum = 0.0;
809   MatScalar  *v;
810 
811   PetscFunctionBegin;
812   if (baij->size == 1) {
813     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
814   } else {
815     if (type == NORM_FROBENIUS) {
816       v = amat->a;
817       for (i=0; i<amat->nz*bs2; i++) {
818 #if defined(PETSC_USE_COMPLEX)
819         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
820 #else
821         sum += (*v)*(*v); v++;
822 #endif
823       }
824       v = bmat->a;
825       for (i=0; i<bmat->nz*bs2; i++) {
826 #if defined(PETSC_USE_COMPLEX)
827         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
828 #else
829         sum += (*v)*(*v); v++;
830 #endif
831       }
832       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
833       *nrm = sqrt(*nrm);
834     } else {
835       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
836     }
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 
842 /*
843   Creates the hash table, and sets the table
844   This table is created only once.
845   If new entried need to be added to the matrix
846   then the hash table has to be destroyed and
847   recreated.
848 */
849 #undef __FUNCT__
850 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
851 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
852 {
853   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
854   Mat         A = baij->A,B=baij->B;
855   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
856   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
857   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
858   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
859   int         *HT,key;
860   MatScalar   **HD;
861   PetscReal   tmp;
862 #if defined(PETSC_USE_BOPT_g)
863   int         ct=0,max=0;
864 #endif
865 
866   PetscFunctionBegin;
867   baij->ht_size=(int)(factor*nz);
868   size = baij->ht_size;
869 
870   if (baij->ht) {
871     PetscFunctionReturn(0);
872   }
873 
874   /* Allocate Memory for Hash Table */
875   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
876   baij->ht = (int*)(baij->hd + size);
877   HD       = baij->hd;
878   HT       = baij->ht;
879 
880 
881   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
882 
883 
884   /* Loop Over A */
885   for (i=0; i<a->mbs; i++) {
886     for (j=ai[i]; j<ai[i+1]; j++) {
887       row = i+rstart;
888       col = aj[j]+cstart;
889 
890       key = row*Nbs + col + 1;
891       h1  = HASH(size,key,tmp);
892       for (k=0; k<size; k++){
893         if (HT[(h1+k)%size] == 0.0) {
894           HT[(h1+k)%size] = key;
895           HD[(h1+k)%size] = a->a + j*bs2;
896           break;
897 #if defined(PETSC_USE_BOPT_g)
898         } else {
899           ct++;
900 #endif
901         }
902       }
903 #if defined(PETSC_USE_BOPT_g)
904       if (k> max) max = k;
905 #endif
906     }
907   }
908   /* Loop Over B */
909   for (i=0; i<b->mbs; i++) {
910     for (j=bi[i]; j<bi[i+1]; j++) {
911       row = i+rstart;
912       col = garray[bj[j]];
913       key = row*Nbs + col + 1;
914       h1  = HASH(size,key,tmp);
915       for (k=0; k<size; k++){
916         if (HT[(h1+k)%size] == 0.0) {
917           HT[(h1+k)%size] = key;
918           HD[(h1+k)%size] = b->a + j*bs2;
919           break;
920 #if defined(PETSC_USE_BOPT_g)
921         } else {
922           ct++;
923 #endif
924         }
925       }
926 #if defined(PETSC_USE_BOPT_g)
927       if (k> max) max = k;
928 #endif
929     }
930   }
931 
932   /* Print Summary */
933 #if defined(PETSC_USE_BOPT_g)
934   for (i=0,j=0; i<size; i++) {
935     if (HT[i]) {j++;}
936   }
937   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
938 #endif
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
944 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
945 {
946   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
947   int         ierr,nstash,reallocs;
948   InsertMode  addv;
949 
950   PetscFunctionBegin;
951   if (baij->donotstash) {
952     PetscFunctionReturn(0);
953   }
954 
955   /* make sure all processors are either in INSERTMODE or ADDMODE */
956   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
957   if (addv == (ADD_VALUES|INSERT_VALUES)) {
958     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
959   }
960   mat->insertmode = addv; /* in case this processor had no cache */
961 
962   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
963   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
964   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
965   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
966   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
967   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
973 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
974 {
975   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
976   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
977   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
978   int         *row,*col,other_disassembled;
979   PetscTruth  r1,r2,r3;
980   MatScalar   *val;
981   InsertMode  addv = mat->insertmode;
982 
983   PetscFunctionBegin;
984   if (!baij->donotstash) {
985     while (1) {
986       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
987       if (!flg) break;
988 
989       for (i=0; i<n;) {
990         /* Now identify the consecutive vals belonging to the same row */
991         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
992         if (j < n) ncols = j-i;
993         else       ncols = n-i;
994         /* Now assemble all these values with a single function call */
995         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
996         i = j;
997       }
998     }
999     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1000     /* Now process the block-stash. Since the values are stashed column-oriented,
1001        set the roworiented flag to column oriented, and after MatSetValues()
1002        restore the original flags */
1003     r1 = baij->roworiented;
1004     r2 = a->roworiented;
1005     r3 = b->roworiented;
1006     baij->roworiented = PETSC_FALSE;
1007     a->roworiented    = PETSC_FALSE;
1008     b->roworiented    = PETSC_FALSE;
1009     while (1) {
1010       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1011       if (!flg) break;
1012 
1013       for (i=0; i<n;) {
1014         /* Now identify the consecutive vals belonging to the same row */
1015         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1016         if (j < n) ncols = j-i;
1017         else       ncols = n-i;
1018         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1019         i = j;
1020       }
1021     }
1022     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1023     baij->roworiented = r1;
1024     a->roworiented    = r2;
1025     b->roworiented    = r3;
1026   }
1027 
1028   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1029   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1030 
1031   /* determine if any processor has disassembled, if so we must
1032      also disassemble ourselfs, in order that we may reassemble. */
1033   /*
1034      if nonzero structure of submatrix B cannot change then we know that
1035      no processor disassembled thus we can skip this stuff
1036   */
1037   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1038     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1039     if (mat->was_assembled && !other_disassembled) {
1040       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1041     }
1042   }
1043 
1044   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1045     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1046   }
1047   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1048   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1049 
1050 #if defined(PETSC_USE_BOPT_g)
1051   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1052     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1053     baij->ht_total_ct  = 0;
1054     baij->ht_insert_ct = 0;
1055   }
1056 #endif
1057   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1058     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1059     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1060     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1061   }
1062 
1063   if (baij->rowvalues) {
1064     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1065     baij->rowvalues = 0;
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1072 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1073 {
1074   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1075   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1076   PetscTruth        isascii,isdraw;
1077   PetscViewer       sviewer;
1078   PetscViewerFormat format;
1079 
1080   PetscFunctionBegin;
1081   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1082   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1083   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1084   if (isascii) {
1085     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1086     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1087       MatInfo info;
1088       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1089       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1090       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1091               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1092               baij->bs,(int)info.memory);CHKERRQ(ierr);
1093       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1094       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1095       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1096       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1097       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1098       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1099       PetscFunctionReturn(0);
1100     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1101       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1102       PetscFunctionReturn(0);
1103     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1104       PetscFunctionReturn(0);
1105     }
1106   }
1107 
1108   if (isdraw) {
1109     PetscDraw       draw;
1110     PetscTruth isnull;
1111     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1112     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1113   }
1114 
1115   if (size == 1) {
1116     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1117     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1118   } else {
1119     /* assemble the entire matrix onto first processor. */
1120     Mat         A;
1121     Mat_SeqBAIJ *Aloc;
1122     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1123     MatScalar   *a;
1124 
1125     if (!rank) {
1126       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1127     } else {
1128       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1129     }
1130     PetscLogObjectParent(mat,A);
1131 
1132     /* copy over the A part */
1133     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1134     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1135     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1136 
1137     for (i=0; i<mbs; i++) {
1138       rvals[0] = bs*(baij->rstart + i);
1139       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1140       for (j=ai[i]; j<ai[i+1]; j++) {
1141         col = (baij->cstart+aj[j])*bs;
1142         for (k=0; k<bs; k++) {
1143           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1144           col++; a += bs;
1145         }
1146       }
1147     }
1148     /* copy over the B part */
1149     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1150     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1151     for (i=0; i<mbs; i++) {
1152       rvals[0] = bs*(baij->rstart + i);
1153       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1154       for (j=ai[i]; j<ai[i+1]; j++) {
1155         col = baij->garray[aj[j]]*bs;
1156         for (k=0; k<bs; k++) {
1157           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1158           col++; a += bs;
1159         }
1160       }
1161     }
1162     ierr = PetscFree(rvals);CHKERRQ(ierr);
1163     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1164     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1165     /*
1166        Everyone has to call to draw the matrix since the graphics waits are
1167        synchronized across all processors that share the PetscDraw object
1168     */
1169     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1170     if (!rank) {
1171       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1172       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1173     }
1174     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1175     ierr = MatDestroy(A);CHKERRQ(ierr);
1176   }
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatView_MPIBAIJ"
1182 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1183 {
1184   int        ierr;
1185   PetscTruth isascii,isdraw,issocket,isbinary;
1186 
1187   PetscFunctionBegin;
1188   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1189   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1190   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1192   if (isascii || isdraw || issocket || isbinary) {
1193     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1194   } else {
1195     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1196   }
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 #undef __FUNCT__
1201 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1202 int MatDestroy_MPIBAIJ(Mat mat)
1203 {
1204   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1205   int         ierr;
1206 
1207   PetscFunctionBegin;
1208 #if defined(PETSC_USE_LOG)
1209   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1210 #endif
1211   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1212   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1213   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1214   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1215   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1216 #if defined (PETSC_USE_CTABLE)
1217   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1218 #else
1219   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1220 #endif
1221   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1222   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1223   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1224   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1225   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1226   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1227 #if defined(PETSC_USE_MAT_SINGLE)
1228   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1229 #endif
1230   ierr = PetscFree(baij);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatMult_MPIBAIJ"
1236 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1237 {
1238   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1239   int         ierr,nt;
1240 
1241   PetscFunctionBegin;
1242   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1243   if (nt != A->n) {
1244     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1245   }
1246   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1247   if (nt != A->m) {
1248     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1249   }
1250   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1251   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1252   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1253   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1254   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1260 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1261 {
1262   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1263   int        ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1267   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1268   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1269   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1275 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1276 {
1277   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1278   int         ierr;
1279 
1280   PetscFunctionBegin;
1281   /* do nondiagonal part */
1282   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1283   /* send it on its way */
1284   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1285   /* do local part */
1286   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1287   /* receive remote parts: note this assumes the values are not actually */
1288   /* inserted in yy until the next line, which is true for my implementation*/
1289   /* but is not perhaps always true. */
1290   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1296 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1297 {
1298   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1299   int         ierr;
1300 
1301   PetscFunctionBegin;
1302   /* do nondiagonal part */
1303   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1304   /* send it on its way */
1305   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1306   /* do local part */
1307   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1308   /* receive remote parts: note this assumes the values are not actually */
1309   /* inserted in yy until the next line, which is true for my implementation*/
1310   /* but is not perhaps always true. */
1311   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 /*
1316   This only works correctly for square matrices where the subblock A->A is the
1317    diagonal block
1318 */
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1321 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1322 {
1323   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1324   int         ierr;
1325 
1326   PetscFunctionBegin;
1327   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1328   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatScale_MPIBAIJ"
1334 int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1335 {
1336   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1337   int         ierr;
1338 
1339   PetscFunctionBegin;
1340   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1341   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1347 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1348 {
1349   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1350   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1351   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1352   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1353   int          *cmap,*idx_p,cstart = mat->cstart;
1354 
1355   PetscFunctionBegin;
1356   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1357   mat->getrowactive = PETSC_TRUE;
1358 
1359   if (!mat->rowvalues && (idx || v)) {
1360     /*
1361         allocate enough space to hold information from the longest row.
1362     */
1363     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1364     int     max = 1,mbs = mat->mbs,tmp;
1365     for (i=0; i<mbs; i++) {
1366       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1367       if (max < tmp) { max = tmp; }
1368     }
1369     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1370     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1371   }
1372 
1373   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1374   lrow = row - brstart;
1375 
1376   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1377   if (!v)   {pvA = 0; pvB = 0;}
1378   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1379   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1380   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1381   nztot = nzA + nzB;
1382 
1383   cmap  = mat->garray;
1384   if (v  || idx) {
1385     if (nztot) {
1386       /* Sort by increasing column numbers, assuming A and B already sorted */
1387       int imark = -1;
1388       if (v) {
1389         *v = v_p = mat->rowvalues;
1390         for (i=0; i<nzB; i++) {
1391           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1392           else break;
1393         }
1394         imark = i;
1395         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1396         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1397       }
1398       if (idx) {
1399         *idx = idx_p = mat->rowindices;
1400         if (imark > -1) {
1401           for (i=0; i<imark; i++) {
1402             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1403           }
1404         } else {
1405           for (i=0; i<nzB; i++) {
1406             if (cmap[cworkB[i]/bs] < cstart)
1407               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1408             else break;
1409           }
1410           imark = i;
1411         }
1412         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1413         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1414       }
1415     } else {
1416       if (idx) *idx = 0;
1417       if (v)   *v   = 0;
1418     }
1419   }
1420   *nz = nztot;
1421   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1422   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1428 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1429 {
1430   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1431 
1432   PetscFunctionBegin;
1433   if (baij->getrowactive == PETSC_FALSE) {
1434     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1435   }
1436   baij->getrowactive = PETSC_FALSE;
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1442 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1443 {
1444   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1445 
1446   PetscFunctionBegin;
1447   *bs = baij->bs;
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1453 int MatZeroEntries_MPIBAIJ(Mat A)
1454 {
1455   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1456   int         ierr;
1457 
1458   PetscFunctionBegin;
1459   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1460   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1466 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1467 {
1468   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1469   Mat         A = a->A,B = a->B;
1470   int         ierr;
1471   PetscReal   isend[5],irecv[5];
1472 
1473   PetscFunctionBegin;
1474   info->block_size     = (PetscReal)a->bs;
1475   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1476   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1477   isend[3] = info->memory;  isend[4] = info->mallocs;
1478   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1479   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1480   isend[3] += info->memory;  isend[4] += info->mallocs;
1481   if (flag == MAT_LOCAL) {
1482     info->nz_used      = isend[0];
1483     info->nz_allocated = isend[1];
1484     info->nz_unneeded  = isend[2];
1485     info->memory       = isend[3];
1486     info->mallocs      = isend[4];
1487   } else if (flag == MAT_GLOBAL_MAX) {
1488     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1489     info->nz_used      = irecv[0];
1490     info->nz_allocated = irecv[1];
1491     info->nz_unneeded  = irecv[2];
1492     info->memory       = irecv[3];
1493     info->mallocs      = irecv[4];
1494   } else if (flag == MAT_GLOBAL_SUM) {
1495     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1496     info->nz_used      = irecv[0];
1497     info->nz_allocated = irecv[1];
1498     info->nz_unneeded  = irecv[2];
1499     info->memory       = irecv[3];
1500     info->mallocs      = irecv[4];
1501   } else {
1502     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1503   }
1504   info->rows_global       = (PetscReal)A->M;
1505   info->columns_global    = (PetscReal)A->N;
1506   info->rows_local        = (PetscReal)A->m;
1507   info->columns_local     = (PetscReal)A->N;
1508   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1509   info->fill_ratio_needed = 0;
1510   info->factor_mallocs    = 0;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1516 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1517 {
1518   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1519   int         ierr;
1520 
1521   PetscFunctionBegin;
1522   switch (op) {
1523   case MAT_NO_NEW_NONZERO_LOCATIONS:
1524   case MAT_YES_NEW_NONZERO_LOCATIONS:
1525   case MAT_COLUMNS_UNSORTED:
1526   case MAT_COLUMNS_SORTED:
1527   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1528   case MAT_KEEP_ZEROED_ROWS:
1529   case MAT_NEW_NONZERO_LOCATION_ERR:
1530     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1531     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1532     break;
1533   case MAT_ROW_ORIENTED:
1534     a->roworiented = PETSC_TRUE;
1535     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1536     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1537     break;
1538   case MAT_ROWS_SORTED:
1539   case MAT_ROWS_UNSORTED:
1540   case MAT_YES_NEW_DIAGONALS:
1541     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1542     break;
1543   case MAT_COLUMN_ORIENTED:
1544     a->roworiented = PETSC_FALSE;
1545     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1546     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1547     break;
1548   case MAT_IGNORE_OFF_PROC_ENTRIES:
1549     a->donotstash = PETSC_TRUE;
1550     break;
1551   case MAT_NO_NEW_DIAGONALS:
1552     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1553   case MAT_USE_HASH_TABLE:
1554     a->ht_flag = PETSC_TRUE;
1555     break;
1556   case MAT_SYMMETRIC:
1557   case MAT_STRUCTURALLY_SYMMETRIC:
1558   case MAT_NOT_SYMMETRIC:
1559   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1560   case MAT_HERMITIAN:
1561   case MAT_NOT_HERMITIAN:
1562   case MAT_SYMMETRY_ETERNAL:
1563   case MAT_NOT_SYMMETRY_ETERNAL:
1564     break;
1565   default:
1566     SETERRQ(PETSC_ERR_SUP,"unknown option");
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1573 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1574 {
1575   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1576   Mat_SeqBAIJ *Aloc;
1577   Mat         B;
1578   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1579   int         bs=baij->bs,mbs=baij->mbs;
1580   MatScalar   *a;
1581 
1582   PetscFunctionBegin;
1583   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1584   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1585 
1586   /* copy over the A part */
1587   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1588   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1589   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1590 
1591   for (i=0; i<mbs; i++) {
1592     rvals[0] = bs*(baij->rstart + i);
1593     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1594     for (j=ai[i]; j<ai[i+1]; j++) {
1595       col = (baij->cstart+aj[j])*bs;
1596       for (k=0; k<bs; k++) {
1597         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1598         col++; a += bs;
1599       }
1600     }
1601   }
1602   /* copy over the B part */
1603   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1604   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1605   for (i=0; i<mbs; i++) {
1606     rvals[0] = bs*(baij->rstart + i);
1607     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1608     for (j=ai[i]; j<ai[i+1]; j++) {
1609       col = baij->garray[aj[j]]*bs;
1610       for (k=0; k<bs; k++) {
1611         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1612         col++; a += bs;
1613       }
1614     }
1615   }
1616   ierr = PetscFree(rvals);CHKERRQ(ierr);
1617   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1618   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1619 
1620   if (matout) {
1621     *matout = B;
1622   } else {
1623     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1624   }
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1630 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1631 {
1632   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1633   Mat         a = baij->A,b = baij->B;
1634   int         ierr,s1,s2,s3;
1635 
1636   PetscFunctionBegin;
1637   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1638   if (rr) {
1639     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1640     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1641     /* Overlap communication with computation. */
1642     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1643   }
1644   if (ll) {
1645     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1646     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1647     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1648   }
1649   /* scale  the diagonal block */
1650   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1651 
1652   if (rr) {
1653     /* Do a scatter end and then right scale the off-diagonal block */
1654     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1655     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1656   }
1657 
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1663 int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1664 {
1665   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1666   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1667   int            *nprocs,j,idx,nsends,row;
1668   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1669   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1670   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1671   MPI_Comm       comm = A->comm;
1672   MPI_Request    *send_waits,*recv_waits;
1673   MPI_Status     recv_status,*send_status;
1674   IS             istmp;
1675   PetscTruth     found;
1676 
1677   PetscFunctionBegin;
1678   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1679   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1680 
1681   /*  first count number of contributors to each processor */
1682   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1683   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1684   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1685   for (i=0; i<N; i++) {
1686     idx   = rows[i];
1687     found = PETSC_FALSE;
1688     for (j=0; j<size; j++) {
1689       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1690         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1691       }
1692     }
1693     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1694   }
1695   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1696 
1697   /* inform other processors of number of messages and max length*/
1698   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1699 
1700   /* post receives:   */
1701   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1702   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1703   for (i=0; i<nrecvs; i++) {
1704     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1705   }
1706 
1707   /* do sends:
1708      1) starts[i] gives the starting index in svalues for stuff going to
1709      the ith processor
1710   */
1711   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1712   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1713   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1714   starts[0]  = 0;
1715   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1716   for (i=0; i<N; i++) {
1717     svalues[starts[owner[i]]++] = rows[i];
1718   }
1719   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1720 
1721   starts[0] = 0;
1722   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1723   count = 0;
1724   for (i=0; i<size; i++) {
1725     if (nprocs[2*i+1]) {
1726       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1727     }
1728   }
1729   ierr = PetscFree(starts);CHKERRQ(ierr);
1730 
1731   base = owners[rank]*bs;
1732 
1733   /*  wait on receives */
1734   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1735   source = lens + nrecvs;
1736   count  = nrecvs; slen = 0;
1737   while (count) {
1738     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1739     /* unpack receives into our local space */
1740     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1741     source[imdex]  = recv_status.MPI_SOURCE;
1742     lens[imdex]    = n;
1743     slen          += n;
1744     count--;
1745   }
1746   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1747 
1748   /* move the data into the send scatter */
1749   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1750   count = 0;
1751   for (i=0; i<nrecvs; i++) {
1752     values = rvalues + i*nmax;
1753     for (j=0; j<lens[i]; j++) {
1754       lrows[count++] = values[j] - base;
1755     }
1756   }
1757   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1758   ierr = PetscFree(lens);CHKERRQ(ierr);
1759   ierr = PetscFree(owner);CHKERRQ(ierr);
1760   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1761 
1762   /* actually zap the local rows */
1763   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1764   PetscLogObjectParent(A,istmp);
1765 
1766   /*
1767         Zero the required rows. If the "diagonal block" of the matrix
1768      is square and the user wishes to set the diagonal we use seperate
1769      code so that MatSetValues() is not called for each diagonal allocating
1770      new memory, thus calling lots of mallocs and slowing things down.
1771 
1772        Contributed by: Mathew Knepley
1773   */
1774   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1775   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1776   if (diag && (l->A->M == l->A->N)) {
1777     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1778   } else if (diag) {
1779     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1780     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1781       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1782 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1783     }
1784     for (i=0; i<slen; i++) {
1785       row  = lrows[i] + rstart_bs;
1786       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1787     }
1788     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1789     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1790   } else {
1791     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1792   }
1793 
1794   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1795   ierr = PetscFree(lrows);CHKERRQ(ierr);
1796 
1797   /* wait on sends */
1798   if (nsends) {
1799     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1800     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1801     ierr = PetscFree(send_status);CHKERRQ(ierr);
1802   }
1803   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1804   ierr = PetscFree(svalues);CHKERRQ(ierr);
1805 
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1811 int MatPrintHelp_MPIBAIJ(Mat A)
1812 {
1813   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1814   MPI_Comm    comm = A->comm;
1815   static int  called = 0;
1816   int         ierr;
1817 
1818   PetscFunctionBegin;
1819   if (!a->rank) {
1820     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1821   }
1822   if (called) {PetscFunctionReturn(0);} else called = 1;
1823   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1824   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1830 int MatSetUnfactored_MPIBAIJ(Mat A)
1831 {
1832   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1833   int         ierr;
1834 
1835   PetscFunctionBegin;
1836   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatEqual_MPIBAIJ"
1844 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1845 {
1846   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1847   Mat         a,b,c,d;
1848   PetscTruth  flg;
1849   int         ierr;
1850 
1851   PetscFunctionBegin;
1852   a = matA->A; b = matA->B;
1853   c = matB->A; d = matB->B;
1854 
1855   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1856   if (flg == PETSC_TRUE) {
1857     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1858   }
1859   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1866 int MatSetUpPreallocation_MPIBAIJ(Mat A)
1867 {
1868   int        ierr;
1869 
1870   PetscFunctionBegin;
1871   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 /* -------------------------------------------------------------------*/
1876 static struct _MatOps MatOps_Values = {
1877        MatSetValues_MPIBAIJ,
1878        MatGetRow_MPIBAIJ,
1879        MatRestoreRow_MPIBAIJ,
1880        MatMult_MPIBAIJ,
1881 /* 4*/ MatMultAdd_MPIBAIJ,
1882        MatMultTranspose_MPIBAIJ,
1883        MatMultTransposeAdd_MPIBAIJ,
1884        0,
1885        0,
1886        0,
1887 /*10*/ 0,
1888        0,
1889        0,
1890        0,
1891        MatTranspose_MPIBAIJ,
1892 /*15*/ MatGetInfo_MPIBAIJ,
1893        MatEqual_MPIBAIJ,
1894        MatGetDiagonal_MPIBAIJ,
1895        MatDiagonalScale_MPIBAIJ,
1896        MatNorm_MPIBAIJ,
1897 /*20*/ MatAssemblyBegin_MPIBAIJ,
1898        MatAssemblyEnd_MPIBAIJ,
1899        0,
1900        MatSetOption_MPIBAIJ,
1901        MatZeroEntries_MPIBAIJ,
1902 /*25*/ MatZeroRows_MPIBAIJ,
1903        0,
1904        0,
1905        0,
1906        0,
1907 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1908        0,
1909        0,
1910        0,
1911        0,
1912 /*35*/ MatDuplicate_MPIBAIJ,
1913        0,
1914        0,
1915        0,
1916        0,
1917 /*40*/ 0,
1918        MatGetSubMatrices_MPIBAIJ,
1919        MatIncreaseOverlap_MPIBAIJ,
1920        MatGetValues_MPIBAIJ,
1921        0,
1922 /*45*/ MatPrintHelp_MPIBAIJ,
1923        MatScale_MPIBAIJ,
1924        0,
1925        0,
1926        0,
1927 /*50*/ MatGetBlockSize_MPIBAIJ,
1928        0,
1929        0,
1930        0,
1931        0,
1932 /*55*/ 0,
1933        0,
1934        MatSetUnfactored_MPIBAIJ,
1935        0,
1936        MatSetValuesBlocked_MPIBAIJ,
1937 /*60*/ 0,
1938        MatDestroy_MPIBAIJ,
1939        MatView_MPIBAIJ,
1940        MatGetPetscMaps_Petsc,
1941        0,
1942 /*65*/ 0,
1943        0,
1944        0,
1945        0,
1946        0,
1947 /*70*/ MatGetRowMax_MPIBAIJ,
1948        0,
1949        0,
1950        0,
1951        0,
1952 /*75*/ 0,
1953        0,
1954        0,
1955        0,
1956        0,
1957 /*80*/ 0,
1958        0,
1959        0,
1960        0,
1961        0,
1962 /*85*/ MatLoad_MPIBAIJ
1963 };
1964 
1965 
1966 EXTERN_C_BEGIN
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1969 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1970 {
1971   PetscFunctionBegin;
1972   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1973   *iscopy = PETSC_FALSE;
1974   PetscFunctionReturn(0);
1975 }
1976 EXTERN_C_END
1977 
1978 EXTERN_C_BEGIN
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
1981 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1982 {
1983   Mat_MPIBAIJ  *b;
1984   int          ierr,i;
1985 
1986   PetscFunctionBegin;
1987   B->preallocated = PETSC_TRUE;
1988   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1989 
1990   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1991   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1992   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1993   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1994   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1995   if (d_nnz) {
1996   for (i=0; i<B->m/bs; i++) {
1997       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1998     }
1999   }
2000   if (o_nnz) {
2001     for (i=0; i<B->m/bs; i++) {
2002       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2003     }
2004   }
2005 
2006   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2007   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2008   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2009   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2010 
2011   b = (Mat_MPIBAIJ*)B->data;
2012   b->bs  = bs;
2013   b->bs2 = bs*bs;
2014   b->mbs = B->m/bs;
2015   b->nbs = B->n/bs;
2016   b->Mbs = B->M/bs;
2017   b->Nbs = B->N/bs;
2018 
2019   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2020   b->rowners[0]    = 0;
2021   for (i=2; i<=b->size; i++) {
2022     b->rowners[i] += b->rowners[i-1];
2023   }
2024   b->rstart    = b->rowners[b->rank];
2025   b->rend      = b->rowners[b->rank+1];
2026 
2027   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2028   b->cowners[0] = 0;
2029   for (i=2; i<=b->size; i++) {
2030     b->cowners[i] += b->cowners[i-1];
2031   }
2032   b->cstart    = b->cowners[b->rank];
2033   b->cend      = b->cowners[b->rank+1];
2034 
2035   for (i=0; i<=b->size; i++) {
2036     b->rowners_bs[i] = b->rowners[i]*bs;
2037   }
2038   b->rstart_bs = b->rstart*bs;
2039   b->rend_bs   = b->rend*bs;
2040   b->cstart_bs = b->cstart*bs;
2041   b->cend_bs   = b->cend*bs;
2042 
2043   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2044   PetscLogObjectParent(B,b->A);
2045   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2046   PetscLogObjectParent(B,b->B);
2047   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2048 
2049   PetscFunctionReturn(0);
2050 }
2051 EXTERN_C_END
2052 
2053 EXTERN_C_BEGIN
2054 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2055 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2056 EXTERN_C_END
2057 
2058 /*MC
2059    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2060 
2061    Options Database Keys:
2062 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2063 
2064   Level: beginner
2065 
2066 .seealso: MatCreateMPIBAIJ
2067 M*/
2068 
2069 EXTERN_C_BEGIN
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatCreate_MPIBAIJ"
2072 int MatCreate_MPIBAIJ(Mat B)
2073 {
2074   Mat_MPIBAIJ  *b;
2075   int          ierr;
2076   PetscTruth   flg;
2077 
2078   PetscFunctionBegin;
2079 
2080   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2081   B->data = (void*)b;
2082 
2083   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2084   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2085   B->mapping    = 0;
2086   B->factor     = 0;
2087   B->assembled  = PETSC_FALSE;
2088 
2089   B->insertmode = NOT_SET_VALUES;
2090   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2091   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2092 
2093   /* build local table of row and column ownerships */
2094   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2095   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2096   b->cowners    = b->rowners + b->size + 2;
2097   b->rowners_bs = b->cowners + b->size + 2;
2098 
2099   /* build cache for off array entries formed */
2100   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2101   b->donotstash  = PETSC_FALSE;
2102   b->colmap      = PETSC_NULL;
2103   b->garray      = PETSC_NULL;
2104   b->roworiented = PETSC_TRUE;
2105 
2106 #if defined(PETSC_USE_MAT_SINGLE)
2107   /* stuff for MatSetValues_XXX in single precision */
2108   b->setvalueslen     = 0;
2109   b->setvaluescopy    = PETSC_NULL;
2110 #endif
2111 
2112   /* stuff used in block assembly */
2113   b->barray       = 0;
2114 
2115   /* stuff used for matrix vector multiply */
2116   b->lvec         = 0;
2117   b->Mvctx        = 0;
2118 
2119   /* stuff for MatGetRow() */
2120   b->rowindices   = 0;
2121   b->rowvalues    = 0;
2122   b->getrowactive = PETSC_FALSE;
2123 
2124   /* hash table stuff */
2125   b->ht           = 0;
2126   b->hd           = 0;
2127   b->ht_size      = 0;
2128   b->ht_flag      = PETSC_FALSE;
2129   b->ht_fact      = 0;
2130   b->ht_total_ct  = 0;
2131   b->ht_insert_ct = 0;
2132 
2133   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2134   if (flg) {
2135     PetscReal fact = 1.39;
2136     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2137     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2138     if (fact <= 1.0) fact = 1.39;
2139     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2140     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2141   }
2142   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2143                                      "MatStoreValues_MPIBAIJ",
2144                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2145   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2146                                      "MatRetrieveValues_MPIBAIJ",
2147                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2148   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2149                                      "MatGetDiagonalBlock_MPIBAIJ",
2150                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2151   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2152                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2153                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2154   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2155                                      "MatDiagonalScaleLocal_MPIBAIJ",
2156                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2157   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2158                                      "MatSetHashTableFactor_MPIBAIJ",
2159                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 EXTERN_C_END
2163 
2164 /*MC
2165    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2166 
2167    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2168    and MATMPIBAIJ otherwise.
2169 
2170    Options Database Keys:
2171 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2172 
2173   Level: beginner
2174 
2175 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2176 M*/
2177 
2178 EXTERN_C_BEGIN
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatCreate_BAIJ"
2181 int MatCreate_BAIJ(Mat A) {
2182   int ierr,size;
2183 
2184   PetscFunctionBegin;
2185   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2186   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2187   if (size == 1) {
2188     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2189   } else {
2190     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2191   }
2192   PetscFunctionReturn(0);
2193 }
2194 EXTERN_C_END
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2198 /*@C
2199    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2200    (block compressed row).  For good matrix assembly performance
2201    the user should preallocate the matrix storage by setting the parameters
2202    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2203    performance can be increased by more than a factor of 50.
2204 
2205    Collective on Mat
2206 
2207    Input Parameters:
2208 +  A - the matrix
2209 .  bs   - size of blockk
2210 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2211            submatrix  (same for all local rows)
2212 .  d_nnz - array containing the number of block nonzeros in the various block rows
2213            of the in diagonal portion of the local (possibly different for each block
2214            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2215 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2216            submatrix (same for all local rows).
2217 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2218            off-diagonal portion of the local submatrix (possibly different for
2219            each block row) or PETSC_NULL.
2220 
2221    Output Parameter:
2222 
2223 
2224    Options Database Keys:
2225 .   -mat_no_unroll - uses code that does not unroll the loops in the
2226                      block calculations (much slower)
2227 .   -mat_block_size - size of the blocks to use
2228 
2229    Notes:
2230    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2231    than it must be used on all processors that share the object for that argument.
2232 
2233    Storage Information:
2234    For a square global matrix we define each processor's diagonal portion
2235    to be its local rows and the corresponding columns (a square submatrix);
2236    each processor's off-diagonal portion encompasses the remainder of the
2237    local matrix (a rectangular submatrix).
2238 
2239    The user can specify preallocated storage for the diagonal part of
2240    the local submatrix with either d_nz or d_nnz (not both).  Set
2241    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2242    memory allocation.  Likewise, specify preallocated storage for the
2243    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2244 
2245    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2246    the figure below we depict these three local rows and all columns (0-11).
2247 
2248 .vb
2249            0 1 2 3 4 5 6 7 8 9 10 11
2250           -------------------
2251    row 3  |  o o o d d d o o o o o o
2252    row 4  |  o o o d d d o o o o o o
2253    row 5  |  o o o d d d o o o o o o
2254           -------------------
2255 .ve
2256 
2257    Thus, any entries in the d locations are stored in the d (diagonal)
2258    submatrix, and any entries in the o locations are stored in the
2259    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2260    stored simply in the MATSEQBAIJ format for compressed row storage.
2261 
2262    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2263    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2264    In general, for PDE problems in which most nonzeros are near the diagonal,
2265    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2266    or you will get TERRIBLE performance; see the users' manual chapter on
2267    matrices.
2268 
2269    Level: intermediate
2270 
2271 .keywords: matrix, block, aij, compressed row, sparse, parallel
2272 
2273 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2274 @*/
2275 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2276 {
2277   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2278 
2279   PetscFunctionBegin;
2280   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2281   if (f) {
2282     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2283   }
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 #undef __FUNCT__
2288 #define __FUNCT__ "MatCreateMPIBAIJ"
2289 /*@C
2290    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2291    (block compressed row).  For good matrix assembly performance
2292    the user should preallocate the matrix storage by setting the parameters
2293    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2294    performance can be increased by more than a factor of 50.
2295 
2296    Collective on MPI_Comm
2297 
2298    Input Parameters:
2299 +  comm - MPI communicator
2300 .  bs   - size of blockk
2301 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2302            This value should be the same as the local size used in creating the
2303            y vector for the matrix-vector product y = Ax.
2304 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2305            This value should be the same as the local size used in creating the
2306            x vector for the matrix-vector product y = Ax.
2307 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2308 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2309 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2310            submatrix  (same for all local rows)
2311 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2312            of the in diagonal portion of the local (possibly different for each block
2313            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2314 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2315            submatrix (same for all local rows).
2316 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2317            off-diagonal portion of the local submatrix (possibly different for
2318            each block row) or PETSC_NULL.
2319 
2320    Output Parameter:
2321 .  A - the matrix
2322 
2323    Options Database Keys:
2324 .   -mat_no_unroll - uses code that does not unroll the loops in the
2325                      block calculations (much slower)
2326 .   -mat_block_size - size of the blocks to use
2327 
2328    Notes:
2329    A nonzero block is any block that as 1 or more nonzeros in it
2330 
2331    The user MUST specify either the local or global matrix dimensions
2332    (possibly both).
2333 
2334    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2335    than it must be used on all processors that share the object for that argument.
2336 
2337    Storage Information:
2338    For a square global matrix we define each processor's diagonal portion
2339    to be its local rows and the corresponding columns (a square submatrix);
2340    each processor's off-diagonal portion encompasses the remainder of the
2341    local matrix (a rectangular submatrix).
2342 
2343    The user can specify preallocated storage for the diagonal part of
2344    the local submatrix with either d_nz or d_nnz (not both).  Set
2345    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2346    memory allocation.  Likewise, specify preallocated storage for the
2347    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2348 
2349    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2350    the figure below we depict these three local rows and all columns (0-11).
2351 
2352 .vb
2353            0 1 2 3 4 5 6 7 8 9 10 11
2354           -------------------
2355    row 3  |  o o o d d d o o o o o o
2356    row 4  |  o o o d d d o o o o o o
2357    row 5  |  o o o d d d o o o o o o
2358           -------------------
2359 .ve
2360 
2361    Thus, any entries in the d locations are stored in the d (diagonal)
2362    submatrix, and any entries in the o locations are stored in the
2363    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2364    stored simply in the MATSEQBAIJ format for compressed row storage.
2365 
2366    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2367    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2368    In general, for PDE problems in which most nonzeros are near the diagonal,
2369    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2370    or you will get TERRIBLE performance; see the users' manual chapter on
2371    matrices.
2372 
2373    Level: intermediate
2374 
2375 .keywords: matrix, block, aij, compressed row, sparse, parallel
2376 
2377 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2378 @*/
2379 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2380 {
2381   int ierr,size;
2382 
2383   PetscFunctionBegin;
2384   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2385   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2386   if (size > 1) {
2387     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2388     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2389   } else {
2390     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2391     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2392   }
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2398 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2399 {
2400   Mat         mat;
2401   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2402   int         ierr,len=0;
2403 
2404   PetscFunctionBegin;
2405   *newmat       = 0;
2406   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2407   ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr);
2408   mat->preallocated = PETSC_TRUE;
2409   mat->assembled    = PETSC_TRUE;
2410   a      = (Mat_MPIBAIJ*)mat->data;
2411   a->bs  = oldmat->bs;
2412   a->bs2 = oldmat->bs2;
2413   a->mbs = oldmat->mbs;
2414   a->nbs = oldmat->nbs;
2415   a->Mbs = oldmat->Mbs;
2416   a->Nbs = oldmat->Nbs;
2417 
2418   a->rstart       = oldmat->rstart;
2419   a->rend         = oldmat->rend;
2420   a->cstart       = oldmat->cstart;
2421   a->cend         = oldmat->cend;
2422   a->size         = oldmat->size;
2423   a->rank         = oldmat->rank;
2424   a->donotstash   = oldmat->donotstash;
2425   a->roworiented  = oldmat->roworiented;
2426   a->rowindices   = 0;
2427   a->rowvalues    = 0;
2428   a->getrowactive = PETSC_FALSE;
2429   a->barray       = 0;
2430   a->rstart_bs    = oldmat->rstart_bs;
2431   a->rend_bs      = oldmat->rend_bs;
2432   a->cstart_bs    = oldmat->cstart_bs;
2433   a->cend_bs      = oldmat->cend_bs;
2434 
2435   /* hash table stuff */
2436   a->ht           = 0;
2437   a->hd           = 0;
2438   a->ht_size      = 0;
2439   a->ht_flag      = oldmat->ht_flag;
2440   a->ht_fact      = oldmat->ht_fact;
2441   a->ht_total_ct  = 0;
2442   a->ht_insert_ct = 0;
2443 
2444   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2445   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2446   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2447   if (oldmat->colmap) {
2448 #if defined (PETSC_USE_CTABLE)
2449   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2450 #else
2451   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2452   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2453   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2454 #endif
2455   } else a->colmap = 0;
2456   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2457     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2458     PetscLogObjectMemory(mat,len*sizeof(int));
2459     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2460   } else a->garray = 0;
2461 
2462   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2463   PetscLogObjectParent(mat,a->lvec);
2464   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2465 
2466   PetscLogObjectParent(mat,a->Mvctx);
2467   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2468   PetscLogObjectParent(mat,a->A);
2469   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2470   PetscLogObjectParent(mat,a->B);
2471   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2472   *newmat = mat;
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 #include "petscsys.h"
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatLoad_MPIBAIJ"
2480 int MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2481 {
2482   Mat          A;
2483   int          i,nz,ierr,j,rstart,rend,fd;
2484   PetscScalar  *vals,*buf;
2485   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2486   MPI_Status   status;
2487   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2488   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2489   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2490   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2491   int          dcount,kmax,k,nzcount,tmp;
2492 
2493   PetscFunctionBegin;
2494   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2495 
2496   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2497   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2498   if (!rank) {
2499     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2500     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2501     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2502     if (header[3] < 0) {
2503       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2504     }
2505   }
2506 
2507   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2508   M = header[1]; N = header[2];
2509 
2510   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2511 
2512   /*
2513      This code adds extra rows to make sure the number of rows is
2514      divisible by the blocksize
2515   */
2516   Mbs        = M/bs;
2517   extra_rows = bs - M + bs*(Mbs);
2518   if (extra_rows == bs) extra_rows = 0;
2519   else                  Mbs++;
2520   if (extra_rows &&!rank) {
2521     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2522   }
2523 
2524   /* determine ownership of all rows */
2525   mbs        = Mbs/size + ((Mbs % size) > rank);
2526   m          = mbs*bs;
2527   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2528   browners   = rowners + size + 1;
2529   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2530   rowners[0] = 0;
2531   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2532   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2533   rstart = rowners[rank];
2534   rend   = rowners[rank+1];
2535 
2536   /* distribute row lengths to all processors */
2537   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2538   if (!rank) {
2539     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2540     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2541     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2542     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2543     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2544     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2545     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2546   } else {
2547     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2548   }
2549 
2550   if (!rank) {
2551     /* calculate the number of nonzeros on each processor */
2552     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2553     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2554     for (i=0; i<size; i++) {
2555       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2556         procsnz[i] += rowlengths[j];
2557       }
2558     }
2559     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2560 
2561     /* determine max buffer needed and allocate it */
2562     maxnz = 0;
2563     for (i=0; i<size; i++) {
2564       maxnz = PetscMax(maxnz,procsnz[i]);
2565     }
2566     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2567 
2568     /* read in my part of the matrix column indices  */
2569     nz     = procsnz[0];
2570     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2571     mycols = ibuf;
2572     if (size == 1)  nz -= extra_rows;
2573     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2574     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2575 
2576     /* read in every ones (except the last) and ship off */
2577     for (i=1; i<size-1; i++) {
2578       nz   = procsnz[i];
2579       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2580       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2581     }
2582     /* read in the stuff for the last proc */
2583     if (size != 1) {
2584       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2585       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2586       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2587       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2588     }
2589     ierr = PetscFree(cols);CHKERRQ(ierr);
2590   } else {
2591     /* determine buffer space needed for message */
2592     nz = 0;
2593     for (i=0; i<m; i++) {
2594       nz += locrowlens[i];
2595     }
2596     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2597     mycols = ibuf;
2598     /* receive message of column indices*/
2599     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2600     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2601     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2602   }
2603 
2604   /* loop over local rows, determining number of off diagonal entries */
2605   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2606   odlens   = dlens + (rend-rstart);
2607   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2608   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2609   masked1  = mask    + Mbs;
2610   masked2  = masked1 + Mbs;
2611   rowcount = 0; nzcount = 0;
2612   for (i=0; i<mbs; i++) {
2613     dcount  = 0;
2614     odcount = 0;
2615     for (j=0; j<bs; j++) {
2616       kmax = locrowlens[rowcount];
2617       for (k=0; k<kmax; k++) {
2618         tmp = mycols[nzcount++]/bs;
2619         if (!mask[tmp]) {
2620           mask[tmp] = 1;
2621           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2622           else masked1[dcount++] = tmp;
2623         }
2624       }
2625       rowcount++;
2626     }
2627 
2628     dlens[i]  = dcount;
2629     odlens[i] = odcount;
2630 
2631     /* zero out the mask elements we set */
2632     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2633     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2634   }
2635 
2636   /* create our matrix */
2637   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2638   ierr = MatSetType(A,type);CHKERRQ(ierr)
2639   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2640 
2641   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2642   MatSetOption(A,MAT_COLUMNS_SORTED);
2643 
2644   if (!rank) {
2645     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2646     /* read in my part of the matrix numerical values  */
2647     nz = procsnz[0];
2648     vals = buf;
2649     mycols = ibuf;
2650     if (size == 1)  nz -= extra_rows;
2651     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2652     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2653 
2654     /* insert into matrix */
2655     jj      = rstart*bs;
2656     for (i=0; i<m; i++) {
2657       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2658       mycols += locrowlens[i];
2659       vals   += locrowlens[i];
2660       jj++;
2661     }
2662     /* read in other processors (except the last one) and ship out */
2663     for (i=1; i<size-1; i++) {
2664       nz   = procsnz[i];
2665       vals = buf;
2666       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2667       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2668     }
2669     /* the last proc */
2670     if (size != 1){
2671       nz   = procsnz[i] - extra_rows;
2672       vals = buf;
2673       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2674       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2675       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2676     }
2677     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2678   } else {
2679     /* receive numeric values */
2680     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2681 
2682     /* receive message of values*/
2683     vals   = buf;
2684     mycols = ibuf;
2685     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2686     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2687     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2688 
2689     /* insert into matrix */
2690     jj      = rstart*bs;
2691     for (i=0; i<m; i++) {
2692       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2693       mycols += locrowlens[i];
2694       vals   += locrowlens[i];
2695       jj++;
2696     }
2697   }
2698   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2699   ierr = PetscFree(buf);CHKERRQ(ierr);
2700   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2701   ierr = PetscFree(rowners);CHKERRQ(ierr);
2702   ierr = PetscFree(dlens);CHKERRQ(ierr);
2703   ierr = PetscFree(mask);CHKERRQ(ierr);
2704   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2705   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2706 
2707   *newmat = A;
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2713 /*@
2714    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2715 
2716    Input Parameters:
2717 .  mat  - the matrix
2718 .  fact - factor
2719 
2720    Collective on Mat
2721 
2722    Level: advanced
2723 
2724   Notes:
2725    This can also be set by the command line option: -mat_use_hash_table fact
2726 
2727 .keywords: matrix, hashtable, factor, HT
2728 
2729 .seealso: MatSetOption()
2730 @*/
2731 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2732 {
2733   int ierr,(*f)(Mat,PetscReal);
2734 
2735   PetscFunctionBegin;
2736   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2737   if (f) {
2738     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2739   }
2740   PetscFunctionReturn(0);
2741 }
2742 
2743 #undef __FUNCT__
2744 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2745 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2746 {
2747   Mat_MPIBAIJ *baij;
2748 
2749   PetscFunctionBegin;
2750   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2751   baij = (Mat_MPIBAIJ*)mat->data;
2752   baij->ht_fact = fact;
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 #undef __FUNCT__
2757 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2758 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2759 {
2760   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2761   PetscFunctionBegin;
2762   *Ad     = a->A;
2763   *Ao     = a->B;
2764   *colmap = a->garray;
2765   PetscFunctionReturn(0);
2766 }
2767