xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 53e63a63e2b322862e46cb3df41e7eff98a20d20)
1 /*$Id: mpibaij.c,v 1.217 2001/03/22 20:30:21 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN int DisAssemble_MPIBAIJ(Mat);
8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 #else
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
33 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
35 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
37 #endif
38 
39 #undef __FUNC__
40 #define __FUNC__ "MatGetRowMax_MPIBAIJ"
41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
42 {
43   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
44   int          ierr,i;
45   Scalar       *va,*vb;
46   Vec          vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNC__
70 #define __FUNC__ "MatStoreValues_MPIBAIJ"
71 int MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
74   int         ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNC__
85 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
86 int MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
89   int         ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNC__
105 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
106 static int CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110   int         nbs = B->nbs,i,bs=B->bs,ierr;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (int*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (int*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNC__
284 #define __FUNC__ "MatSetValues_MPIBAIJ"
285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
286 {
287   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288   int         ierr,i,N = m*n;
289   MatScalar   *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298 
299   for (i=0; i<N; i++) {
300     vsingle[i] = v[i];
301   }
302   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNC__
307 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
309 {
310   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311   int         ierr,i,N = m*n*b->bs2;
312   MatScalar   *vsingle;
313 
314   PetscFunctionBegin;
315   if (N > b->setvalueslen) {
316     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
317     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
318     b->setvalueslen  = N;
319   }
320   vsingle = b->setvaluescopy;
321   for (i=0; i<N; i++) {
322     vsingle[i] = v[i];
323   }
324   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNC__
329 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
331 {
332   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
333   int         ierr,i,N = m*n;
334   MatScalar   *vsingle;
335 
336   PetscFunctionBegin;
337   if (N > b->setvalueslen) {
338     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
339     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
340     b->setvalueslen  = N;
341   }
342   vsingle = b->setvaluescopy;
343   for (i=0; i<N; i++) {
344     vsingle[i] = v[i];
345   }
346   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNC__
351 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
353 {
354   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
355   int         ierr,i,N = m*n*b->bs2;
356   MatScalar   *vsingle;
357 
358   PetscFunctionBegin;
359   if (N > b->setvalueslen) {
360     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
361     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
362     b->setvalueslen  = N;
363   }
364   vsingle = b->setvaluescopy;
365   for (i=0; i<N; i++) {
366     vsingle[i] = v[i];
367   }
368   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 #endif
372 
373 #undef __FUNC__
374 #define __FUNC__ "MatSetValues_MPIBAIJ"
375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
376 {
377   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
378   MatScalar   value;
379   PetscTruth  roworiented = baij->roworiented;
380   int         ierr,i,j,row,col;
381   int         rstart_orig=baij->rstart_bs;
382   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
383   int         cend_orig=baij->cend_bs,bs=baij->bs;
384 
385   /* Some Variables required in the macro */
386   Mat         A = baij->A;
387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
388   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
389   MatScalar   *aa=a->a;
390 
391   Mat         B = baij->B;
392   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
393   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
394   MatScalar   *ba=b->a;
395 
396   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
397   int         low,high,t,ridx,cidx,bs2=a->bs2;
398   MatScalar   *ap,*bap;
399 
400   PetscFunctionBegin;
401   for (i=0; i<m; i++) {
402     if (im[i] < 0) continue;
403 #if defined(PETSC_USE_BOPT_g)
404     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
405 #endif
406     if (im[i] >= rstart_orig && im[i] < rend_orig) {
407       row = im[i] - rstart_orig;
408       for (j=0; j<n; j++) {
409         if (in[j] >= cstart_orig && in[j] < cend_orig){
410           col = in[j] - cstart_orig;
411           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
413           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
414         } else if (in[j] < 0) continue;
415 #if defined(PETSC_USE_BOPT_g)
416         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
417 #endif
418         else {
419           if (mat->was_assembled) {
420             if (!baij->colmap) {
421               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
422             }
423 #if defined (PETSC_USE_CTABLE)
424             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
425             col  = col - 1 + in[j]%bs;
426 #else
427             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
428 #endif
429             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
430               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
431               col =  in[j];
432               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
433               B = baij->B;
434               b = (Mat_SeqBAIJ*)(B)->data;
435               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
436               ba=b->a;
437             }
438           } else col = in[j];
439           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
440           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
441           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
442         }
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNC__
458 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
460 {
461   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
462   MatScalar   *value,*barray=baij->barray;
463   PetscTruth  roworiented = baij->roworiented;
464   int         ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
465   int         rend=baij->rend,cstart=baij->cstart,stepval;
466   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
467 
468   PetscFunctionBegin;
469   if(!barray) {
470     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
471     baij->barray = barray;
472   }
473 
474   if (roworiented) {
475     stepval = (n-1)*bs;
476   } else {
477     stepval = (m-1)*bs;
478   }
479   for (i=0; i<m; i++) {
480     if (im[i] < 0) continue;
481 #if defined(PETSC_USE_BOPT_g)
482     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
483 #endif
484     if (im[i] >= rstart && im[i] < rend) {
485       row = im[i] - rstart;
486       for (j=0; j<n; j++) {
487         /* If NumCol = 1 then a copy is not required */
488         if ((roworiented) && (n == 1)) {
489           barray = v + i*bs2;
490         } else if((!roworiented) && (m == 1)) {
491           barray = v + j*bs2;
492         } else { /* Here a copy is required */
493           if (roworiented) {
494             value = v + i*(stepval+bs)*bs + j*bs;
495           } else {
496             value = v + j*(stepval+bs)*bs + i*bs;
497           }
498           for (ii=0; ii<bs; ii++,value+=stepval) {
499             for (jj=0; jj<bs; jj++) {
500               *barray++  = *value++;
501             }
502           }
503           barray -=bs2;
504         }
505 
506         if (in[j] >= cstart && in[j] < cend){
507           col  = in[j] - cstart;
508           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510         else if (in[j] < 0) continue;
511 #if defined(PETSC_USE_BOPT_g)
512         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
513 #endif
514         else {
515           if (mat->was_assembled) {
516             if (!baij->colmap) {
517               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
518             }
519 
520 #if defined(PETSC_USE_BOPT_g)
521 #if defined (PETSC_USE_CTABLE)
522             { int data;
523               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
525             }
526 #else
527             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
528 #endif
529 #endif
530 #if defined (PETSC_USE_CTABLE)
531 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532             col  = (col - 1)/bs;
533 #else
534             col = (baij->colmap[in[j]] - 1)/bs;
535 #endif
536             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
538               col =  in[j];
539             }
540           }
541           else col = in[j];
542           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555   PetscFunctionReturn(0);
556 }
557 
558 #define HASH_KEY 0.6180339887
559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562 #undef __FUNC__
563 #define __FUNC__ "MatSetValues_MPIBAIJ_HT_MatScalar"
564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
565 {
566   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscTruth  roworiented = baij->roworiented;
568   int         ierr,i,j,row,col;
569   int         rstart_orig=baij->rstart_bs;
570   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
571   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
572   PetscReal   tmp;
573   MatScalar   **HD = baij->hd,value;
574 #if defined(PETSC_USE_BOPT_g)
575   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
576 #endif
577 
578   PetscFunctionBegin;
579 
580   for (i=0; i<m; i++) {
581 #if defined(PETSC_USE_BOPT_g)
582     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
583     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
584 #endif
585       row = im[i];
586     if (row >= rstart_orig && row < rend_orig) {
587       for (j=0; j<n; j++) {
588         col = in[j];
589         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
590         /* Look up into the Hash Table */
591         key = (row/bs)*Nbs+(col/bs)+1;
592         h1  = HASH(size,key,tmp);
593 
594 
595         idx = h1;
596 #if defined(PETSC_USE_BOPT_g)
597         insert_ct++;
598         total_ct++;
599         if (HT[idx] != key) {
600           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
601           if (idx == size) {
602             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
603             if (idx == h1) {
604               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
605             }
606           }
607         }
608 #else
609         if (HT[idx] != key) {
610           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
611           if (idx == size) {
612             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
613             if (idx == h1) {
614               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
615             }
616           }
617         }
618 #endif
619         /* A HASH table entry is found, so insert the values at the correct address */
620         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
621         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_BOPT_g)
634   baij->ht_total_ct = total_ct;
635   baij->ht_insert_ct = insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNC__
641 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
643 {
644   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
645   PetscTruth  roworiented = baij->roworiented;
646   int         ierr,i,j,ii,jj,row,col;
647   int         rstart=baij->rstart ;
648   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
649   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
650   PetscReal   tmp;
651   MatScalar   **HD = baij->hd,*baij_a;
652   MatScalar   *v_t,*value;
653 #if defined(PETSC_USE_BOPT_g)
654   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
655 #endif
656 
657   PetscFunctionBegin;
658 
659   if (roworiented) {
660     stepval = (n-1)*bs;
661   } else {
662     stepval = (m-1)*bs;
663   }
664   for (i=0; i<m; i++) {
665 #if defined(PETSC_USE_BOPT_g)
666     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
667     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
668 #endif
669     row   = im[i];
670     v_t   = v + i*bs2;
671     if (row >= rstart && row < rend) {
672       for (j=0; j<n; j++) {
673         col = in[j];
674 
675         /* Look up into the Hash Table */
676         key = row*Nbs+col+1;
677         h1  = HASH(size,key,tmp);
678 
679         idx = h1;
680 #if defined(PETSC_USE_BOPT_g)
681         total_ct++;
682         insert_ct++;
683        if (HT[idx] != key) {
684           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
685           if (idx == size) {
686             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
687             if (idx == h1) {
688               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
689             }
690           }
691         }
692 #else
693         if (HT[idx] != key) {
694           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
695           if (idx == size) {
696             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
697             if (idx == h1) {
698               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
699             }
700           }
701         }
702 #endif
703         baij_a = HD[idx];
704         if (roworiented) {
705           /*value = v + i*(stepval+bs)*bs + j*bs;*/
706           /* value = v + (i*(stepval+bs)+j)*bs; */
707           value = v_t;
708           v_t  += bs;
709           if (addv == ADD_VALUES) {
710             for (ii=0; ii<bs; ii++,value+=stepval) {
711               for (jj=ii; jj<bs2; jj+=bs) {
712                 baij_a[jj]  += *value++;
713               }
714             }
715           } else {
716             for (ii=0; ii<bs; ii++,value+=stepval) {
717               for (jj=ii; jj<bs2; jj+=bs) {
718                 baij_a[jj]  = *value++;
719               }
720             }
721           }
722         } else {
723           value = v + j*(stepval+bs)*bs + i*bs;
724           if (addv == ADD_VALUES) {
725             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
726               for (jj=0; jj<bs; jj++) {
727                 baij_a[jj]  += *value++;
728               }
729             }
730           } else {
731             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
732               for (jj=0; jj<bs; jj++) {
733                 baij_a[jj]  = *value++;
734               }
735             }
736           }
737         }
738       }
739     } else {
740       if (!baij->donotstash) {
741         if (roworiented) {
742           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743         } else {
744           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
745         }
746       }
747     }
748   }
749 #if defined(PETSC_USE_BOPT_g)
750   baij->ht_total_ct = total_ct;
751   baij->ht_insert_ct = insert_ct;
752 #endif
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNC__
757 #define __FUNC__ "MatGetValues_MPIBAIJ"
758 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
759 {
760   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
761   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
762   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
763 
764   PetscFunctionBegin;
765   for (i=0; i<m; i++) {
766     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
767     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
768     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
769       row = idxm[i] - bsrstart;
770       for (j=0; j<n; j++) {
771         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
772         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
773         if (idxn[j] >= bscstart && idxn[j] < bscend){
774           col = idxn[j] - bscstart;
775           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776         } else {
777           if (!baij->colmap) {
778             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
779           }
780 #if defined (PETSC_USE_CTABLE)
781           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
782           data --;
783 #else
784           data = baij->colmap[idxn[j]/bs]-1;
785 #endif
786           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
787           else {
788             col  = data + idxn[j]%bs;
789             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
790           }
791         }
792       }
793     } else {
794       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
795     }
796   }
797  PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNC__
801 #define __FUNC__ "MatNorm_MPIBAIJ"
802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
803 {
804   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
805   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
806   int        ierr,i,bs2=baij->bs2;
807   PetscReal  sum = 0.0;
808   MatScalar  *v;
809 
810   PetscFunctionBegin;
811   if (baij->size == 1) {
812     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
813   } else {
814     if (type == NORM_FROBENIUS) {
815       v = amat->a;
816       for (i=0; i<amat->nz*bs2; i++) {
817 #if defined(PETSC_USE_COMPLEX)
818         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
819 #else
820         sum += (*v)*(*v); v++;
821 #endif
822       }
823       v = bmat->a;
824       for (i=0; i<bmat->nz*bs2; i++) {
825 #if defined(PETSC_USE_COMPLEX)
826         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
827 #else
828         sum += (*v)*(*v); v++;
829 #endif
830       }
831       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
832       *norm = sqrt(*norm);
833     } else {
834       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835     }
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNC__
849 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat         A = baij->A,B=baij->B;
854   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
857   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858   int         *HT,key;
859   MatScalar   **HD;
860   PetscReal   tmp;
861 #if defined(PETSC_USE_BOPT_g)
862   int         ct=0,max=0;
863 #endif
864 
865   PetscFunctionBegin;
866   baij->ht_size=(int)(factor*nz);
867   size = baij->ht_size;
868 
869   if (baij->ht) {
870     PetscFunctionReturn(0);
871   }
872 
873   /* Allocate Memory for Hash Table */
874   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
875   baij->ht = (int*)(baij->hd + size);
876   HD       = baij->hd;
877   HT       = baij->ht;
878 
879 
880   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
881 
882 
883   /* Loop Over A */
884   for (i=0; i<a->mbs; i++) {
885     for (j=ai[i]; j<ai[i+1]; j++) {
886       row = i+rstart;
887       col = aj[j]+cstart;
888 
889       key = row*Nbs + col + 1;
890       h1  = HASH(size,key,tmp);
891       for (k=0; k<size; k++){
892         if (HT[(h1+k)%size] == 0.0) {
893           HT[(h1+k)%size] = key;
894           HD[(h1+k)%size] = a->a + j*bs2;
895           break;
896 #if defined(PETSC_USE_BOPT_g)
897         } else {
898           ct++;
899 #endif
900         }
901       }
902 #if defined(PETSC_USE_BOPT_g)
903       if (k> max) max = k;
904 #endif
905     }
906   }
907   /* Loop Over B */
908   for (i=0; i<b->mbs; i++) {
909     for (j=bi[i]; j<bi[i+1]; j++) {
910       row = i+rstart;
911       col = garray[bj[j]];
912       key = row*Nbs + col + 1;
913       h1  = HASH(size,key,tmp);
914       for (k=0; k<size; k++){
915         if (HT[(h1+k)%size] == 0.0) {
916           HT[(h1+k)%size] = key;
917           HD[(h1+k)%size] = b->a + j*bs2;
918           break;
919 #if defined(PETSC_USE_BOPT_g)
920         } else {
921           ct++;
922 #endif
923         }
924       }
925 #if defined(PETSC_USE_BOPT_g)
926       if (k> max) max = k;
927 #endif
928     }
929   }
930 
931   /* Print Summary */
932 #if defined(PETSC_USE_BOPT_g)
933   for (i=0,j=0; i<size; i++) {
934     if (HT[i]) {j++;}
935   }
936   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
937 #endif
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNC__
942 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944 {
945   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
946   int         ierr,nstash,reallocs;
947   InsertMode  addv;
948 
949   PetscFunctionBegin;
950   if (baij->donotstash) {
951     PetscFunctionReturn(0);
952   }
953 
954   /* make sure all processors are either in INSERTMODE or ADDMODE */
955   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
956   if (addv == (ADD_VALUES|INSERT_VALUES)) {
957     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
958   }
959   mat->insertmode = addv; /* in case this processor had no cache */
960 
961   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
962   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
963   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
964   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
965   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
966   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
967   PetscFunctionReturn(0);
968 }
969 
970 #undef __FUNC__
971 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
972 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
973 {
974   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
975   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
976   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
977   int         *row,*col,other_disassembled;
978   PetscTruth  r1,r2,r3;
979   MatScalar   *val;
980   InsertMode  addv = mat->insertmode;
981 
982   PetscFunctionBegin;
983   if (!baij->donotstash) {
984     while (1) {
985       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
986       if (!flg) break;
987 
988       for (i=0; i<n;) {
989         /* Now identify the consecutive vals belonging to the same row */
990         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
991         if (j < n) ncols = j-i;
992         else       ncols = n-i;
993         /* Now assemble all these values with a single function call */
994         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
995         i = j;
996       }
997     }
998     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
999     /* Now process the block-stash. Since the values are stashed column-oriented,
1000        set the roworiented flag to column oriented, and after MatSetValues()
1001        restore the original flags */
1002     r1 = baij->roworiented;
1003     r2 = a->roworiented;
1004     r3 = b->roworiented;
1005     baij->roworiented = PETSC_FALSE;
1006     a->roworiented    = PETSC_FALSE;
1007     b->roworiented    = PETSC_FALSE;
1008     while (1) {
1009       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1010       if (!flg) break;
1011 
1012       for (i=0; i<n;) {
1013         /* Now identify the consecutive vals belonging to the same row */
1014         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1015         if (j < n) ncols = j-i;
1016         else       ncols = n-i;
1017         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1018         i = j;
1019       }
1020     }
1021     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1022     baij->roworiented = r1;
1023     a->roworiented    = r2;
1024     b->roworiented    = r3;
1025   }
1026 
1027   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1028   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1029 
1030   /* determine if any processor has disassembled, if so we must
1031      also disassemble ourselfs, in order that we may reassemble. */
1032   /*
1033      if nonzero structure of submatrix B cannot change then we know that
1034      no processor disassembled thus we can skip this stuff
1035   */
1036   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1037     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1038     if (mat->was_assembled && !other_disassembled) {
1039       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1040     }
1041   }
1042 
1043   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1044     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1045   }
1046   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1047   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1048 
1049 #if defined(PETSC_USE_BOPT_g)
1050   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1051     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1052     baij->ht_total_ct  = 0;
1053     baij->ht_insert_ct = 0;
1054   }
1055 #endif
1056   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1057     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1058     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1059     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1060   }
1061 
1062   if (baij->rowvalues) {
1063     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1064     baij->rowvalues = 0;
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNC__
1070 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1071 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1072 {
1073   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1074   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1075   PetscTruth        isascii,isdraw;
1076   PetscViewer       sviewer;
1077   PetscViewerFormat format;
1078 
1079   PetscFunctionBegin;
1080   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1081   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1082   if (isascii) {
1083     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1084     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
1085       MatInfo info;
1086       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1087       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1088       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1089               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1090               baij->bs,(int)info.memory);CHKERRQ(ierr);
1091       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1092       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1093       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1094       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1095       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1096       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1097       PetscFunctionReturn(0);
1098     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1099       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1100       PetscFunctionReturn(0);
1101     }
1102   }
1103 
1104   if (isdraw) {
1105     PetscDraw       draw;
1106     PetscTruth isnull;
1107     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1108     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1109   }
1110 
1111   if (size == 1) {
1112     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1113   } else {
1114     /* assemble the entire matrix onto first processor. */
1115     Mat         A;
1116     Mat_SeqBAIJ *Aloc;
1117     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1118     MatScalar   *a;
1119 
1120     if (!rank) {
1121       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1122     } else {
1123       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1124     }
1125     PetscLogObjectParent(mat,A);
1126 
1127     /* copy over the A part */
1128     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1129     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1130     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1131 
1132     for (i=0; i<mbs; i++) {
1133       rvals[0] = bs*(baij->rstart + i);
1134       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1135       for (j=ai[i]; j<ai[i+1]; j++) {
1136         col = (baij->cstart+aj[j])*bs;
1137         for (k=0; k<bs; k++) {
1138           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1139           col++; a += bs;
1140         }
1141       }
1142     }
1143     /* copy over the B part */
1144     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1145     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1146     for (i=0; i<mbs; i++) {
1147       rvals[0] = bs*(baij->rstart + i);
1148       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1149       for (j=ai[i]; j<ai[i+1]; j++) {
1150         col = baij->garray[aj[j]]*bs;
1151         for (k=0; k<bs; k++) {
1152           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1153           col++; a += bs;
1154         }
1155       }
1156     }
1157     ierr = PetscFree(rvals);CHKERRQ(ierr);
1158     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1159     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1160     /*
1161        Everyone has to call to draw the matrix since the graphics waits are
1162        synchronized across all processors that share the PetscDraw object
1163     */
1164     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1165     if (!rank) {
1166       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1167     }
1168     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1169     ierr = MatDestroy(A);CHKERRQ(ierr);
1170   }
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNC__
1175 #define __FUNC__ "MatView_MPIBAIJ"
1176 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1177 {
1178   int        ierr;
1179   PetscTruth isascii,isdraw,issocket,isbinary;
1180 
1181   PetscFunctionBegin;
1182   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1183   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1184   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1185   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1186   if (isascii || isdraw || issocket || isbinary) {
1187     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1188   } else {
1189     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNC__
1195 #define __FUNC__ "MatDestroy_MPIBAIJ"
1196 int MatDestroy_MPIBAIJ(Mat mat)
1197 {
1198   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1199   int         ierr;
1200 
1201   PetscFunctionBegin;
1202 #if defined(PETSC_USE_LOG)
1203   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1204 #endif
1205   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1206   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1207   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1208   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1209   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1210 #if defined (PETSC_USE_CTABLE)
1211   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1212 #else
1213   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1214 #endif
1215   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1216   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1217   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1218   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1219   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1220   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1221 #if defined(PETSC_USE_MAT_SINGLE)
1222   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1223 #endif
1224   ierr = PetscFree(baij);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNC__
1229 #define __FUNC__ "MatMult_MPIBAIJ"
1230 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1231 {
1232   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1233   int         ierr,nt;
1234 
1235   PetscFunctionBegin;
1236   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1237   if (nt != A->n) {
1238     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1239   }
1240   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1241   if (nt != A->m) {
1242     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1243   }
1244   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1245   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1246   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1247   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1248   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNC__
1253 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1254 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1255 {
1256   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1257   int        ierr;
1258 
1259   PetscFunctionBegin;
1260   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1261   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1262   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1263   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNC__
1268 #define __FUNC__ "MatMultTranspose_MPIBAIJ"
1269 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1270 {
1271   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1272   int         ierr;
1273 
1274   PetscFunctionBegin;
1275   /* do nondiagonal part */
1276   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1277   /* send it on its way */
1278   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1279   /* do local part */
1280   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1281   /* receive remote parts: note this assumes the values are not actually */
1282   /* inserted in yy until the next line, which is true for my implementation*/
1283   /* but is not perhaps always true. */
1284   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNC__
1289 #define __FUNC__ "MatMultTransposeAdd_MPIBAIJ"
1290 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1291 {
1292   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1293   int         ierr;
1294 
1295   PetscFunctionBegin;
1296   /* do nondiagonal part */
1297   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1298   /* send it on its way */
1299   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1300   /* do local part */
1301   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1302   /* receive remote parts: note this assumes the values are not actually */
1303   /* inserted in yy until the next line, which is true for my implementation*/
1304   /* but is not perhaps always true. */
1305   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 /*
1310   This only works correctly for square matrices where the subblock A->A is the
1311    diagonal block
1312 */
1313 #undef __FUNC__
1314 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1315 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1316 {
1317   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1318   int         ierr;
1319 
1320   PetscFunctionBegin;
1321   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1322   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNC__
1327 #define __FUNC__ "MatScale_MPIBAIJ"
1328 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1329 {
1330   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1331   int         ierr;
1332 
1333   PetscFunctionBegin;
1334   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1335   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNC__
1340 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1341 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1342 {
1343   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1344 
1345   PetscFunctionBegin;
1346   if (m) *m = mat->rstart_bs;
1347   if (n) *n = mat->rend_bs;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNC__
1352 #define __FUNC__ "MatGetRow_MPIBAIJ"
1353 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1354 {
1355   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1356   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1357   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1358   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1359   int        *cmap,*idx_p,cstart = mat->cstart;
1360 
1361   PetscFunctionBegin;
1362   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1363   mat->getrowactive = PETSC_TRUE;
1364 
1365   if (!mat->rowvalues && (idx || v)) {
1366     /*
1367         allocate enough space to hold information from the longest row.
1368     */
1369     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1370     int     max = 1,mbs = mat->mbs,tmp;
1371     for (i=0; i<mbs; i++) {
1372       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1373       if (max < tmp) { max = tmp; }
1374     }
1375     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);CHKERRQ(ierr);
1376     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1377   }
1378 
1379   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1380   lrow = row - brstart;
1381 
1382   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1383   if (!v)   {pvA = 0; pvB = 0;}
1384   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1385   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1386   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1387   nztot = nzA + nzB;
1388 
1389   cmap  = mat->garray;
1390   if (v  || idx) {
1391     if (nztot) {
1392       /* Sort by increasing column numbers, assuming A and B already sorted */
1393       int imark = -1;
1394       if (v) {
1395         *v = v_p = mat->rowvalues;
1396         for (i=0; i<nzB; i++) {
1397           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1398           else break;
1399         }
1400         imark = i;
1401         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1402         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1403       }
1404       if (idx) {
1405         *idx = idx_p = mat->rowindices;
1406         if (imark > -1) {
1407           for (i=0; i<imark; i++) {
1408             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1409           }
1410         } else {
1411           for (i=0; i<nzB; i++) {
1412             if (cmap[cworkB[i]/bs] < cstart)
1413               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1414             else break;
1415           }
1416           imark = i;
1417         }
1418         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1419         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1420       }
1421     } else {
1422       if (idx) *idx = 0;
1423       if (v)   *v   = 0;
1424     }
1425   }
1426   *nz = nztot;
1427   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1428   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNC__
1433 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1434 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1435 {
1436   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1437 
1438   PetscFunctionBegin;
1439   if (baij->getrowactive == PETSC_FALSE) {
1440     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1441   }
1442   baij->getrowactive = PETSC_FALSE;
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNC__
1447 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1448 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1449 {
1450   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1451 
1452   PetscFunctionBegin;
1453   *bs = baij->bs;
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNC__
1458 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1459 int MatZeroEntries_MPIBAIJ(Mat A)
1460 {
1461   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1462   int         ierr;
1463 
1464   PetscFunctionBegin;
1465   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1466   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 #undef __FUNC__
1471 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1472 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1473 {
1474   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1475   Mat         A = a->A,B = a->B;
1476   int         ierr;
1477   PetscReal   isend[5],irecv[5];
1478 
1479   PetscFunctionBegin;
1480   info->block_size     = (double)a->bs;
1481   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1482   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1483   isend[3] = info->memory;  isend[4] = info->mallocs;
1484   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1485   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1486   isend[3] += info->memory;  isend[4] += info->mallocs;
1487   if (flag == MAT_LOCAL) {
1488     info->nz_used      = isend[0];
1489     info->nz_allocated = isend[1];
1490     info->nz_unneeded  = isend[2];
1491     info->memory       = isend[3];
1492     info->mallocs      = isend[4];
1493   } else if (flag == MAT_GLOBAL_MAX) {
1494     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1495     info->nz_used      = irecv[0];
1496     info->nz_allocated = irecv[1];
1497     info->nz_unneeded  = irecv[2];
1498     info->memory       = irecv[3];
1499     info->mallocs      = irecv[4];
1500   } else if (flag == MAT_GLOBAL_SUM) {
1501     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1502     info->nz_used      = irecv[0];
1503     info->nz_allocated = irecv[1];
1504     info->nz_unneeded  = irecv[2];
1505     info->memory       = irecv[3];
1506     info->mallocs      = irecv[4];
1507   } else {
1508     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1509   }
1510   info->rows_global       = (double)A->M;
1511   info->columns_global    = (double)A->N;
1512   info->rows_local        = (double)A->m;
1513   info->columns_local     = (double)A->N;
1514   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1515   info->fill_ratio_needed = 0;
1516   info->factor_mallocs    = 0;
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNC__
1521 #define __FUNC__ "MatSetOption_MPIBAIJ"
1522 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1523 {
1524   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1525   int         ierr;
1526 
1527   PetscFunctionBegin;
1528   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1529       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1530       op == MAT_COLUMNS_UNSORTED ||
1531       op == MAT_COLUMNS_SORTED ||
1532       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1533       op == MAT_KEEP_ZEROED_ROWS ||
1534       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1535         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1536         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1537   } else if (op == MAT_ROW_ORIENTED) {
1538         a->roworiented = PETSC_TRUE;
1539         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1540         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1541   } else if (op == MAT_ROWS_SORTED ||
1542              op == MAT_ROWS_UNSORTED ||
1543              op == MAT_YES_NEW_DIAGONALS ||
1544              op == MAT_USE_HASH_TABLE) {
1545     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1546   } else if (op == MAT_COLUMN_ORIENTED) {
1547     a->roworiented = PETSC_FALSE;
1548     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1549     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1550   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1551     a->donotstash = PETSC_TRUE;
1552   } else if (op == MAT_NO_NEW_DIAGONALS) {
1553     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1554   } else if (op == MAT_USE_HASH_TABLE) {
1555     a->ht_flag = PETSC_TRUE;
1556   } else {
1557     SETERRQ(PETSC_ERR_SUP,"unknown option");
1558   }
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNC__
1563 #define __FUNC__ "MatTranspose_MPIBAIJ("
1564 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1565 {
1566   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1567   Mat_SeqBAIJ *Aloc;
1568   Mat         B;
1569   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1570   int         bs=baij->bs,mbs=baij->mbs;
1571   MatScalar   *a;
1572 
1573   PetscFunctionBegin;
1574   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1575   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1576 
1577   /* copy over the A part */
1578   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1579   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1580   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1581 
1582   for (i=0; i<mbs; i++) {
1583     rvals[0] = bs*(baij->rstart + i);
1584     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1585     for (j=ai[i]; j<ai[i+1]; j++) {
1586       col = (baij->cstart+aj[j])*bs;
1587       for (k=0; k<bs; k++) {
1588         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1589         col++; a += bs;
1590       }
1591     }
1592   }
1593   /* copy over the B part */
1594   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1595   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1596   for (i=0; i<mbs; i++) {
1597     rvals[0] = bs*(baij->rstart + i);
1598     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1599     for (j=ai[i]; j<ai[i+1]; j++) {
1600       col = baij->garray[aj[j]]*bs;
1601       for (k=0; k<bs; k++) {
1602         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1603         col++; a += bs;
1604       }
1605     }
1606   }
1607   ierr = PetscFree(rvals);CHKERRQ(ierr);
1608   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1609   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1610 
1611   if (matout) {
1612     *matout = B;
1613   } else {
1614     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1615   }
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 #undef __FUNC__
1620 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1621 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1622 {
1623   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1624   Mat         a = baij->A,b = baij->B;
1625   int         ierr,s1,s2,s3;
1626 
1627   PetscFunctionBegin;
1628   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1629   if (rr) {
1630     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1631     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1632     /* Overlap communication with computation. */
1633     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1634   }
1635   if (ll) {
1636     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1637     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1638     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1639   }
1640   /* scale  the diagonal block */
1641   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1642 
1643   if (rr) {
1644     /* Do a scatter end and then right scale the off-diagonal block */
1645     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1646     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1647   }
1648 
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNC__
1653 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1654 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1655 {
1656   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1657   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1658   int            *procs,*nprocs,j,idx,nsends,*work,row;
1659   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1660   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1661   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1662   MPI_Comm       comm = A->comm;
1663   MPI_Request    *send_waits,*recv_waits;
1664   MPI_Status     recv_status,*send_status;
1665   IS             istmp;
1666   PetscTruth     found;
1667 
1668   PetscFunctionBegin;
1669   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1670   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1671 
1672   /*  first count number of contributors to each processor */
1673   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1674   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1675   procs = nprocs + size;
1676   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1677   for (i=0; i<N; i++) {
1678     idx   = rows[i];
1679     found = PETSC_FALSE;
1680     for (j=0; j<size; j++) {
1681       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1682         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1683       }
1684     }
1685     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1686   }
1687   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1688 
1689   /* inform other processors of number of messages and max length*/
1690   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
1691   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1692   nmax   = work[rank];
1693   nrecvs = work[size+rank];
1694   ierr   = PetscFree(work);CHKERRQ(ierr);
1695 
1696   /* post receives:   */
1697   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1698   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1699   for (i=0; i<nrecvs; i++) {
1700     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1701   }
1702 
1703   /* do sends:
1704      1) starts[i] gives the starting index in svalues for stuff going to
1705      the ith processor
1706   */
1707   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1708   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1709   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1710   starts[0]  = 0;
1711   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1712   for (i=0; i<N; i++) {
1713     svalues[starts[owner[i]]++] = rows[i];
1714   }
1715   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1716 
1717   starts[0] = 0;
1718   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1719   count = 0;
1720   for (i=0; i<size; i++) {
1721     if (procs[i]) {
1722       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1723     }
1724   }
1725   ierr = PetscFree(starts);CHKERRQ(ierr);
1726 
1727   base = owners[rank]*bs;
1728 
1729   /*  wait on receives */
1730   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1731   source = lens + nrecvs;
1732   count  = nrecvs; slen = 0;
1733   while (count) {
1734     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1735     /* unpack receives into our local space */
1736     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1737     source[imdex]  = recv_status.MPI_SOURCE;
1738     lens[imdex]    = n;
1739     slen          += n;
1740     count--;
1741   }
1742   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1743 
1744   /* move the data into the send scatter */
1745   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1746   count = 0;
1747   for (i=0; i<nrecvs; i++) {
1748     values = rvalues + i*nmax;
1749     for (j=0; j<lens[i]; j++) {
1750       lrows[count++] = values[j] - base;
1751     }
1752   }
1753   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1754   ierr = PetscFree(lens);CHKERRQ(ierr);
1755   ierr = PetscFree(owner);CHKERRQ(ierr);
1756   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1757 
1758   /* actually zap the local rows */
1759   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1760   PetscLogObjectParent(A,istmp);
1761 
1762   /*
1763         Zero the required rows. If the "diagonal block" of the matrix
1764      is square and the user wishes to set the diagonal we use seperate
1765      code so that MatSetValues() is not called for each diagonal allocating
1766      new memory, thus calling lots of mallocs and slowing things down.
1767 
1768        Contributed by: Mathew Knepley
1769   */
1770   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1771   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1772   if (diag && (l->A->M == l->A->N)) {
1773     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1774   } else if (diag) {
1775     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1776     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1777       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1778 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1779     }
1780     for (i=0; i<slen; i++) {
1781       row  = lrows[i] + rstart_bs;
1782       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1783     }
1784     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1785     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1786   } else {
1787     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1788   }
1789 
1790   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1791   ierr = PetscFree(lrows);CHKERRQ(ierr);
1792 
1793   /* wait on sends */
1794   if (nsends) {
1795     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1796     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1797     ierr = PetscFree(send_status);CHKERRQ(ierr);
1798   }
1799   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1800   ierr = PetscFree(svalues);CHKERRQ(ierr);
1801 
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNC__
1806 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1807 int MatPrintHelp_MPIBAIJ(Mat A)
1808 {
1809   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1810   MPI_Comm    comm = A->comm;
1811   static int  called = 0;
1812   int         ierr;
1813 
1814   PetscFunctionBegin;
1815   if (!a->rank) {
1816     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1817   }
1818   if (called) {PetscFunctionReturn(0);} else called = 1;
1819   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1820   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #undef __FUNC__
1825 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1826 int MatSetUnfactored_MPIBAIJ(Mat A)
1827 {
1828   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1829   int         ierr;
1830 
1831   PetscFunctionBegin;
1832   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1837 
1838 #undef __FUNC__
1839 #define __FUNC__ "MatEqual_MPIBAIJ"
1840 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1841 {
1842   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1843   Mat         a,b,c,d;
1844   PetscTruth  flg;
1845   int         ierr;
1846 
1847   PetscFunctionBegin;
1848   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr);
1849   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1850   a = matA->A; b = matA->B;
1851   c = matB->A; d = matB->B;
1852 
1853   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1854   if (flg == PETSC_TRUE) {
1855     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1856   }
1857   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 
1862 #undef __FUNC__
1863 #define __FUNC__ "MatSetUpPreallocation_MPIBAIJ"
1864 int MatSetUpPreallocation_MPIBAIJ(Mat A)
1865 {
1866   int        ierr;
1867 
1868   PetscFunctionBegin;
1869   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 /* -------------------------------------------------------------------*/
1874 static struct _MatOps MatOps_Values = {
1875   MatSetValues_MPIBAIJ,
1876   MatGetRow_MPIBAIJ,
1877   MatRestoreRow_MPIBAIJ,
1878   MatMult_MPIBAIJ,
1879   MatMultAdd_MPIBAIJ,
1880   MatMultTranspose_MPIBAIJ,
1881   MatMultTransposeAdd_MPIBAIJ,
1882   0,
1883   0,
1884   0,
1885   0,
1886   0,
1887   0,
1888   0,
1889   MatTranspose_MPIBAIJ,
1890   MatGetInfo_MPIBAIJ,
1891   MatEqual_MPIBAIJ,
1892   MatGetDiagonal_MPIBAIJ,
1893   MatDiagonalScale_MPIBAIJ,
1894   MatNorm_MPIBAIJ,
1895   MatAssemblyBegin_MPIBAIJ,
1896   MatAssemblyEnd_MPIBAIJ,
1897   0,
1898   MatSetOption_MPIBAIJ,
1899   MatZeroEntries_MPIBAIJ,
1900   MatZeroRows_MPIBAIJ,
1901   0,
1902   0,
1903   0,
1904   0,
1905   MatSetUpPreallocation_MPIBAIJ,
1906   0,
1907   MatGetOwnershipRange_MPIBAIJ,
1908   0,
1909   0,
1910   0,
1911   0,
1912   MatDuplicate_MPIBAIJ,
1913   0,
1914   0,
1915   0,
1916   0,
1917   0,
1918   MatGetSubMatrices_MPIBAIJ,
1919   MatIncreaseOverlap_MPIBAIJ,
1920   MatGetValues_MPIBAIJ,
1921   0,
1922   MatPrintHelp_MPIBAIJ,
1923   MatScale_MPIBAIJ,
1924   0,
1925   0,
1926   0,
1927   MatGetBlockSize_MPIBAIJ,
1928   0,
1929   0,
1930   0,
1931   0,
1932   0,
1933   0,
1934   MatSetUnfactored_MPIBAIJ,
1935   0,
1936   MatSetValuesBlocked_MPIBAIJ,
1937   0,
1938   MatDestroy_MPIBAIJ,
1939   MatView_MPIBAIJ,
1940   MatGetMaps_Petsc,
1941   0,
1942   0,
1943   0,
1944   0,
1945   0,
1946   0,
1947   MatGetRowMax_MPIBAIJ};
1948 
1949 
1950 EXTERN_C_BEGIN
1951 #undef __FUNC__
1952 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1953 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1954 {
1955   PetscFunctionBegin;
1956   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1957   *iscopy = PETSC_FALSE;
1958   PetscFunctionReturn(0);
1959 }
1960 EXTERN_C_END
1961 
1962 EXTERN_C_BEGIN
1963 #undef __FUNC__
1964 #define __FUNC__ "MatCreate_MPIBAIJ"
1965 int MatCreate_MPIBAIJ(Mat B)
1966 {
1967   Mat_MPIBAIJ  *b;
1968   int          ierr;
1969   PetscTruth   flg;
1970 
1971   PetscFunctionBegin;
1972 
1973   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
1974   B->data = (void*)b;
1975 
1976   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
1977   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1978   B->mapping    = 0;
1979   B->factor     = 0;
1980   B->assembled  = PETSC_FALSE;
1981 
1982   B->insertmode = NOT_SET_VALUES;
1983   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1984   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1985 
1986   /* build local table of row and column ownerships */
1987   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1988   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1989   b->cowners    = b->rowners + b->size + 2;
1990   b->rowners_bs = b->cowners + b->size + 2;
1991 
1992   /* build cache for off array entries formed */
1993   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1994   b->donotstash  = PETSC_FALSE;
1995   b->colmap      = PETSC_NULL;
1996   b->garray      = PETSC_NULL;
1997   b->roworiented = PETSC_TRUE;
1998 
1999 #if defined(PETSC_USE_MAT_SINGLE)
2000   /* stuff for MatSetValues_XXX in single precision */
2001   b->lensetvalues     = 0;
2002   b->setvaluescopy    = PETSC_NULL;
2003 #endif
2004 
2005   /* stuff used in block assembly */
2006   b->barray       = 0;
2007 
2008   /* stuff used for matrix vector multiply */
2009   b->lvec         = 0;
2010   b->Mvctx        = 0;
2011 
2012   /* stuff for MatGetRow() */
2013   b->rowindices   = 0;
2014   b->rowvalues    = 0;
2015   b->getrowactive = PETSC_FALSE;
2016 
2017   /* hash table stuff */
2018   b->ht           = 0;
2019   b->hd           = 0;
2020   b->ht_size      = 0;
2021   b->ht_flag      = PETSC_FALSE;
2022   b->ht_fact      = 0;
2023   b->ht_total_ct  = 0;
2024   b->ht_insert_ct = 0;
2025 
2026   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2027   if (flg) {
2028     double fact = 1.39;
2029     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2030     ierr = PetscOptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2031     if (fact <= 1.0) fact = 1.39;
2032     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2033     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2034   }
2035   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2036                                      "MatStoreValues_MPIBAIJ",
2037                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2038   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2039                                      "MatRetrieveValues_MPIBAIJ",
2040                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2041   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2042                                      "MatGetDiagonalBlock_MPIBAIJ",
2043                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2044   PetscFunctionReturn(0);
2045 }
2046 EXTERN_C_END
2047 
2048 #undef __FUNC__
2049 #define __FUNC__ "MatMPIBAIJSetPreallocation"
2050 /*@C
2051    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2052    (block compressed row).  For good matrix assembly performance
2053    the user should preallocate the matrix storage by setting the parameters
2054    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2055    performance can be increased by more than a factor of 50.
2056 
2057    Collective on Mat
2058 
2059    Input Parameters:
2060 +  A - the matrix
2061 .  bs   - size of blockk
2062 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2063            submatrix  (same for all local rows)
2064 .  d_nnz - array containing the number of block nonzeros in the various block rows
2065            of the in diagonal portion of the local (possibly different for each block
2066            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2067 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2068            submatrix (same for all local rows).
2069 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2070            off-diagonal portion of the local submatrix (possibly different for
2071            each block row) or PETSC_NULL.
2072 
2073    Output Parameter:
2074 
2075 
2076    Options Database Keys:
2077 .   -mat_no_unroll - uses code that does not unroll the loops in the
2078                      block calculations (much slower)
2079 .   -mat_block_size - size of the blocks to use
2080 
2081    Notes:
2082    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2083    than it must be used on all processors that share the object for that argument.
2084 
2085    Storage Information:
2086    For a square global matrix we define each processor's diagonal portion
2087    to be its local rows and the corresponding columns (a square submatrix);
2088    each processor's off-diagonal portion encompasses the remainder of the
2089    local matrix (a rectangular submatrix).
2090 
2091    The user can specify preallocated storage for the diagonal part of
2092    the local submatrix with either d_nz or d_nnz (not both).  Set
2093    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2094    memory allocation.  Likewise, specify preallocated storage for the
2095    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2096 
2097    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2098    the figure below we depict these three local rows and all columns (0-11).
2099 
2100 .vb
2101            0 1 2 3 4 5 6 7 8 9 10 11
2102           -------------------
2103    row 3  |  o o o d d d o o o o o o
2104    row 4  |  o o o d d d o o o o o o
2105    row 5  |  o o o d d d o o o o o o
2106           -------------------
2107 .ve
2108 
2109    Thus, any entries in the d locations are stored in the d (diagonal)
2110    submatrix, and any entries in the o locations are stored in the
2111    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2112    stored simply in the MATSEQBAIJ format for compressed row storage.
2113 
2114    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2115    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2116    In general, for PDE problems in which most nonzeros are near the diagonal,
2117    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2118    or you will get TERRIBLE performance; see the users' manual chapter on
2119    matrices.
2120 
2121    Level: intermediate
2122 
2123 .keywords: matrix, block, aij, compressed row, sparse, parallel
2124 
2125 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2126 @*/
2127 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2128 {
2129   Mat_MPIBAIJ  *b;
2130   int          ierr,i;
2131   PetscTruth   flg2;
2132 
2133   PetscFunctionBegin;
2134   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr);
2135   if (!flg2) PetscFunctionReturn(0);
2136 
2137   B->preallocated = PETSC_TRUE;
2138   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2139 
2140   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2141   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2142   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2143   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2144   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2145   if (d_nnz) {
2146   for (i=0; i<B->m/bs; i++) {
2147       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2148     }
2149   }
2150   if (o_nnz) {
2151     for (i=0; i<B->m/bs; i++) {
2152       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2153     }
2154   }
2155 
2156   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2157   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2158   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2159   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2160 
2161   b = (Mat_MPIBAIJ*)B->data;
2162   b->bs  = bs;
2163   b->bs2 = bs*bs;
2164   b->mbs = B->m/bs;
2165   b->nbs = B->n/bs;
2166   b->Mbs = B->M/bs;
2167   b->Nbs = B->N/bs;
2168 
2169   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2170   b->rowners[0]    = 0;
2171   for (i=2; i<=b->size; i++) {
2172     b->rowners[i] += b->rowners[i-1];
2173   }
2174   b->rstart    = b->rowners[b->rank];
2175   b->rend      = b->rowners[b->rank+1];
2176 
2177   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2178   b->cowners[0] = 0;
2179   for (i=2; i<=b->size; i++) {
2180     b->cowners[i] += b->cowners[i-1];
2181   }
2182   b->cstart    = b->cowners[b->rank];
2183   b->cend      = b->cowners[b->rank+1];
2184 
2185   for (i=0; i<=b->size; i++) {
2186     b->rowners_bs[i] = b->rowners[i]*bs;
2187   }
2188   b->rstart_bs = b->rstart*bs;
2189   b->rend_bs   = b->rend*bs;
2190   b->cstart_bs = b->cstart*bs;
2191   b->cend_bs   = b->cend*bs;
2192 
2193   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2194   PetscLogObjectParent(B,b->A);
2195   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2196   PetscLogObjectParent(B,b->B);
2197   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2198 
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 #undef __FUNC__
2203 #define __FUNC__ "MatCreateMPIBAIJ"
2204 /*@C
2205    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2206    (block compressed row).  For good matrix assembly performance
2207    the user should preallocate the matrix storage by setting the parameters
2208    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2209    performance can be increased by more than a factor of 50.
2210 
2211    Collective on MPI_Comm
2212 
2213    Input Parameters:
2214 +  comm - MPI communicator
2215 .  bs   - size of blockk
2216 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2217            This value should be the same as the local size used in creating the
2218            y vector for the matrix-vector product y = Ax.
2219 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2220            This value should be the same as the local size used in creating the
2221            x vector for the matrix-vector product y = Ax.
2222 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2223 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2224 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2225            submatrix  (same for all local rows)
2226 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2227            of the in diagonal portion of the local (possibly different for each block
2228            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2229 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2230            submatrix (same for all local rows).
2231 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2232            off-diagonal portion of the local submatrix (possibly different for
2233            each block row) or PETSC_NULL.
2234 
2235    Output Parameter:
2236 .  A - the matrix
2237 
2238    Options Database Keys:
2239 .   -mat_no_unroll - uses code that does not unroll the loops in the
2240                      block calculations (much slower)
2241 .   -mat_block_size - size of the blocks to use
2242 
2243    Notes:
2244    A nonzero block is any block that as 1 or more nonzeros in it
2245 
2246    The user MUST specify either the local or global matrix dimensions
2247    (possibly both).
2248 
2249    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2250    than it must be used on all processors that share the object for that argument.
2251 
2252    Storage Information:
2253    For a square global matrix we define each processor's diagonal portion
2254    to be its local rows and the corresponding columns (a square submatrix);
2255    each processor's off-diagonal portion encompasses the remainder of the
2256    local matrix (a rectangular submatrix).
2257 
2258    The user can specify preallocated storage for the diagonal part of
2259    the local submatrix with either d_nz or d_nnz (not both).  Set
2260    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2261    memory allocation.  Likewise, specify preallocated storage for the
2262    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2263 
2264    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2265    the figure below we depict these three local rows and all columns (0-11).
2266 
2267 .vb
2268            0 1 2 3 4 5 6 7 8 9 10 11
2269           -------------------
2270    row 3  |  o o o d d d o o o o o o
2271    row 4  |  o o o d d d o o o o o o
2272    row 5  |  o o o d d d o o o o o o
2273           -------------------
2274 .ve
2275 
2276    Thus, any entries in the d locations are stored in the d (diagonal)
2277    submatrix, and any entries in the o locations are stored in the
2278    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2279    stored simply in the MATSEQBAIJ format for compressed row storage.
2280 
2281    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2282    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2283    In general, for PDE problems in which most nonzeros are near the diagonal,
2284    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2285    or you will get TERRIBLE performance; see the users' manual chapter on
2286    matrices.
2287 
2288    Level: intermediate
2289 
2290 .keywords: matrix, block, aij, compressed row, sparse, parallel
2291 
2292 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2293 @*/
2294 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2295 {
2296   int ierr,size;
2297 
2298   PetscFunctionBegin;
2299   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2300   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2301   if (size > 1) {
2302     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2303     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2304   } else {
2305     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2306     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2307   }
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 #undef __FUNC__
2312 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2313 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2314 {
2315   Mat         mat;
2316   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2317   int         ierr,len=0;
2318 
2319   PetscFunctionBegin;
2320   *newmat       = 0;
2321   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2322   ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr);
2323   mat->preallocated = PETSC_TRUE;
2324   mat->assembled    = PETSC_TRUE;
2325   a      = (Mat_MPIBAIJ*)mat->data;
2326   a->bs  = oldmat->bs;
2327   a->bs2 = oldmat->bs2;
2328   a->mbs = oldmat->mbs;
2329   a->nbs = oldmat->nbs;
2330   a->Mbs = oldmat->Mbs;
2331   a->Nbs = oldmat->Nbs;
2332 
2333   a->rstart       = oldmat->rstart;
2334   a->rend         = oldmat->rend;
2335   a->cstart       = oldmat->cstart;
2336   a->cend         = oldmat->cend;
2337   a->size         = oldmat->size;
2338   a->rank         = oldmat->rank;
2339   a->donotstash   = oldmat->donotstash;
2340   a->roworiented  = oldmat->roworiented;
2341   a->rowindices   = 0;
2342   a->rowvalues    = 0;
2343   a->getrowactive = PETSC_FALSE;
2344   a->barray       = 0;
2345   a->rstart_bs    = oldmat->rstart_bs;
2346   a->rend_bs      = oldmat->rend_bs;
2347   a->cstart_bs    = oldmat->cstart_bs;
2348   a->cend_bs      = oldmat->cend_bs;
2349 
2350   /* hash table stuff */
2351   a->ht           = 0;
2352   a->hd           = 0;
2353   a->ht_size      = 0;
2354   a->ht_flag      = oldmat->ht_flag;
2355   a->ht_fact      = oldmat->ht_fact;
2356   a->ht_total_ct  = 0;
2357   a->ht_insert_ct = 0;
2358 
2359   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2360   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2361   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2362   if (oldmat->colmap) {
2363 #if defined (PETSC_USE_CTABLE)
2364   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2365 #else
2366   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2367   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2368   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2369 #endif
2370   } else a->colmap = 0;
2371   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2372     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2373     PetscLogObjectMemory(mat,len*sizeof(int));
2374     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2375   } else a->garray = 0;
2376 
2377   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2378   PetscLogObjectParent(mat,a->lvec);
2379   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2380 
2381   PetscLogObjectParent(mat,a->Mvctx);
2382   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2383   PetscLogObjectParent(mat,a->A);
2384   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2385   PetscLogObjectParent(mat,a->B);
2386   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2387   *newmat = mat;
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #include "petscsys.h"
2392 
2393 EXTERN_C_BEGIN
2394 #undef __FUNC__
2395 #define __FUNC__ "MatLoad_MPIBAIJ"
2396 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2397 {
2398   Mat          A;
2399   int          i,nz,ierr,j,rstart,rend,fd;
2400   Scalar       *vals,*buf;
2401   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2402   MPI_Status   status;
2403   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2404   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2405   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2406   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2407   int          dcount,kmax,k,nzcount,tmp;
2408 
2409   PetscFunctionBegin;
2410   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2411 
2412   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2413   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2414   if (!rank) {
2415     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2416     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2417     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2418     if (header[3] < 0) {
2419       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2420     }
2421   }
2422 
2423   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2424   M = header[1]; N = header[2];
2425 
2426   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2427 
2428   /*
2429      This code adds extra rows to make sure the number of rows is
2430      divisible by the blocksize
2431   */
2432   Mbs        = M/bs;
2433   extra_rows = bs - M + bs*(Mbs);
2434   if (extra_rows == bs) extra_rows = 0;
2435   else                  Mbs++;
2436   if (extra_rows &&!rank) {
2437     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2438   }
2439 
2440   /* determine ownership of all rows */
2441   mbs        = Mbs/size + ((Mbs % size) > rank);
2442   m          = mbs*bs;
2443   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2444   browners   = rowners + size + 1;
2445   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2446   rowners[0] = 0;
2447   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2448   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2449   rstart = rowners[rank];
2450   rend   = rowners[rank+1];
2451 
2452   /* distribute row lengths to all processors */
2453   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2454   if (!rank) {
2455     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2456     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2457     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2458     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2459     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2460     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2461     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2462   } else {
2463     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2464   }
2465 
2466   if (!rank) {
2467     /* calculate the number of nonzeros on each processor */
2468     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2469     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2470     for (i=0; i<size; i++) {
2471       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2472         procsnz[i] += rowlengths[j];
2473       }
2474     }
2475     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2476 
2477     /* determine max buffer needed and allocate it */
2478     maxnz = 0;
2479     for (i=0; i<size; i++) {
2480       maxnz = PetscMax(maxnz,procsnz[i]);
2481     }
2482     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2483 
2484     /* read in my part of the matrix column indices  */
2485     nz     = procsnz[0];
2486     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2487     mycols = ibuf;
2488     if (size == 1)  nz -= extra_rows;
2489     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2490     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2491 
2492     /* read in every ones (except the last) and ship off */
2493     for (i=1; i<size-1; i++) {
2494       nz   = procsnz[i];
2495       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2496       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2497     }
2498     /* read in the stuff for the last proc */
2499     if (size != 1) {
2500       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2501       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2502       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2503       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2504     }
2505     ierr = PetscFree(cols);CHKERRQ(ierr);
2506   } else {
2507     /* determine buffer space needed for message */
2508     nz = 0;
2509     for (i=0; i<m; i++) {
2510       nz += locrowlens[i];
2511     }
2512     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2513     mycols = ibuf;
2514     /* receive message of column indices*/
2515     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2516     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2517     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2518   }
2519 
2520   /* loop over local rows, determining number of off diagonal entries */
2521   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2522   odlens   = dlens + (rend-rstart);
2523   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2524   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2525   masked1  = mask    + Mbs;
2526   masked2  = masked1 + Mbs;
2527   rowcount = 0; nzcount = 0;
2528   for (i=0; i<mbs; i++) {
2529     dcount  = 0;
2530     odcount = 0;
2531     for (j=0; j<bs; j++) {
2532       kmax = locrowlens[rowcount];
2533       for (k=0; k<kmax; k++) {
2534         tmp = mycols[nzcount++]/bs;
2535         if (!mask[tmp]) {
2536           mask[tmp] = 1;
2537           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2538           else masked1[dcount++] = tmp;
2539         }
2540       }
2541       rowcount++;
2542     }
2543 
2544     dlens[i]  = dcount;
2545     odlens[i] = odcount;
2546 
2547     /* zero out the mask elements we set */
2548     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2549     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2550   }
2551 
2552   /* create our matrix */
2553   ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2554   A = *newmat;
2555   MatSetOption(A,MAT_COLUMNS_SORTED);
2556 
2557   if (!rank) {
2558     ierr = PetscMalloc(maxnz*sizeof(Scalar),&buf);CHKERRQ(ierr);
2559     /* read in my part of the matrix numerical values  */
2560     nz = procsnz[0];
2561     vals = buf;
2562     mycols = ibuf;
2563     if (size == 1)  nz -= extra_rows;
2564     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2565     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2566 
2567     /* insert into matrix */
2568     jj      = rstart*bs;
2569     for (i=0; i<m; i++) {
2570       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2571       mycols += locrowlens[i];
2572       vals   += locrowlens[i];
2573       jj++;
2574     }
2575     /* read in other processors (except the last one) and ship out */
2576     for (i=1; i<size-1; i++) {
2577       nz   = procsnz[i];
2578       vals = buf;
2579       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2580       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2581     }
2582     /* the last proc */
2583     if (size != 1){
2584       nz   = procsnz[i] - extra_rows;
2585       vals = buf;
2586       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2587       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2588       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2589     }
2590     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2591   } else {
2592     /* receive numeric values */
2593     ierr = PetscMalloc(nz*sizeof(Scalar),&buf);CHKERRQ(ierr);
2594 
2595     /* receive message of values*/
2596     vals   = buf;
2597     mycols = ibuf;
2598     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2599     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2600     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2601 
2602     /* insert into matrix */
2603     jj      = rstart*bs;
2604     for (i=0; i<m; i++) {
2605       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2606       mycols += locrowlens[i];
2607       vals   += locrowlens[i];
2608       jj++;
2609     }
2610   }
2611   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2612   ierr = PetscFree(buf);CHKERRQ(ierr);
2613   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2614   ierr = PetscFree(rowners);CHKERRQ(ierr);
2615   ierr = PetscFree(dlens);CHKERRQ(ierr);
2616   ierr = PetscFree(mask);CHKERRQ(ierr);
2617   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2618   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2619   PetscFunctionReturn(0);
2620 }
2621 EXTERN_C_END
2622 
2623 #undef __FUNC__
2624 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2625 /*@
2626    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2627 
2628    Input Parameters:
2629 .  mat  - the matrix
2630 .  fact - factor
2631 
2632    Collective on Mat
2633 
2634    Level: advanced
2635 
2636   Notes:
2637    This can also be set by the command line option: -mat_use_hash_table fact
2638 
2639 .keywords: matrix, hashtable, factor, HT
2640 
2641 .seealso: MatSetOption()
2642 @*/
2643 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2644 {
2645   Mat_MPIBAIJ *baij;
2646   int         ierr;
2647   PetscTruth  flg;
2648 
2649   PetscFunctionBegin;
2650   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2651   ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr);
2652   if (!flg) {
2653     SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only.");
2654   }
2655   baij = (Mat_MPIBAIJ*)mat->data;
2656   baij->ht_fact = fact;
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 #undef __FUNC__
2661 #define __FUNC__ "MatMPIBAIJGetSeqBAIJ"
2662 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2663 {
2664   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2665   PetscFunctionBegin;
2666   *Ad     = a->A;
2667   *Ao     = a->B;
2668   *colmap = a->garray;
2669   PetscFunctionReturn(0);
2670 }
2671