xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 54750694bbef7523d49f1dcfdab14d6260091406)
1 /*$Id: mpibaij.c,v 1.190 2000/04/10 04:25:49 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 extern int MatSetUpMultiply_MPIBAIJ(Mat);
7 extern int DisAssemble_MPIBAIJ(Mat);
8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15 extern int MatPrintHelp_SeqBAIJ(Mat);
16 extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27 extern int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28 extern int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 #else
30 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ
31 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
33 #endif
34 
35 EXTERN_C_BEGIN
36 #undef __FUNC__
37 #define  __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ"
38 int MatStoreValues_MPIBAIJ(Mat mat)
39 {
40   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
41   int         ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
45   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 EXTERN_C_END
49 
50 EXTERN_C_BEGIN
51 #undef __FUNC__
52 #define  __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ"
53 int MatRetrieveValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
56   int         ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
60   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 /*
66      Local utility routine that creates a mapping from the global column
67    number to the local number in the off-diagonal part of the local
68    storage of the matrix.  This is done in a non scable way since the
69    length of colmap equals the global matrix length.
70 */
71 #undef __FUNC__
72 #define  __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private"
73 static int CreateColmap_MPIBAIJ_Private(Mat mat)
74 {
75   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
76   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
77   int         nbs = B->nbs,i,bs=B->bs,ierr;
78 
79   PetscFunctionBegin;
80 #if defined (PETSC_USE_CTABLE)
81   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
82   for (i=0; i<nbs; i++){
83     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
84   }
85 #else
86   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
87   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
88   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
89   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
90 #endif
91   PetscFunctionReturn(0);
92 }
93 
94 #define CHUNKSIZE  10
95 
96 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
97 { \
98  \
99     brow = row/bs;  \
100     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
101     rmax = aimax[brow]; nrow = ailen[brow]; \
102       bcol = col/bs; \
103       ridx = row % bs; cidx = col % bs; \
104       low = 0; high = nrow; \
105       while (high-low > 3) { \
106         t = (low+high)/2; \
107         if (rp[t] > bcol) high = t; \
108         else              low  = t; \
109       } \
110       for (_i=low; _i<high; _i++) { \
111         if (rp[_i] > bcol) break; \
112         if (rp[_i] == bcol) { \
113           bap  = ap +  bs2*_i + bs*cidx + ridx; \
114           if (addv == ADD_VALUES) *bap += value;  \
115           else                    *bap  = value;  \
116           goto a_noinsert; \
117         } \
118       } \
119       if (a->nonew == 1) goto a_noinsert; \
120       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
121       if (nrow >= rmax) { \
122         /* there is no extra room in row, therefore enlarge */ \
123         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
124         MatScalar *new_a; \
125  \
126         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
127  \
128         /* malloc new storage space */ \
129         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
130         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
131         new_j   = (int*)(new_a + bs2*new_nz); \
132         new_i   = new_j + new_nz; \
133  \
134         /* copy over old data into new slots */ \
135         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
136         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
137         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
138         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
139         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
140         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
142         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
143                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
144         /* free up old matrix storage */ \
145         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
146         if (!a->singlemalloc) { \
147           ierr = PetscFree(a->i);CHKERRQ(ierr); \
148           ierr = PetscFree(a->j);CHKERRQ(ierr);\
149         } \
150         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
151         a->singlemalloc = PETSC_TRUE; \
152  \
153         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
154         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
155         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
156         a->maxnz += bs2*CHUNKSIZE; \
157         a->reallocs++; \
158         a->nz++; \
159       } \
160       N = nrow++ - 1;  \
161       /* shift up all the later entries in this row */ \
162       for (ii=N; ii>=_i; ii--) { \
163         rp[ii+1] = rp[ii]; \
164         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
165       } \
166       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
167       rp[_i]                      = bcol;  \
168       ap[bs2*_i + bs*cidx + ridx] = value;  \
169       a_noinsert:; \
170     ailen[brow] = nrow; \
171 }
172 
173 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
174 { \
175     brow = row/bs;  \
176     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
177     rmax = bimax[brow]; nrow = bilen[brow]; \
178       bcol = col/bs; \
179       ridx = row % bs; cidx = col % bs; \
180       low = 0; high = nrow; \
181       while (high-low > 3) { \
182         t = (low+high)/2; \
183         if (rp[t] > bcol) high = t; \
184         else              low  = t; \
185       } \
186       for (_i=low; _i<high; _i++) { \
187         if (rp[_i] > bcol) break; \
188         if (rp[_i] == bcol) { \
189           bap  = ap +  bs2*_i + bs*cidx + ridx; \
190           if (addv == ADD_VALUES) *bap += value;  \
191           else                    *bap  = value;  \
192           goto b_noinsert; \
193         } \
194       } \
195       if (b->nonew == 1) goto b_noinsert; \
196       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
197       if (nrow >= rmax) { \
198         /* there is no extra room in row, therefore enlarge */ \
199         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
200         MatScalar *new_a; \
201  \
202         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
203  \
204         /* malloc new storage space */ \
205         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
206         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
207         new_j   = (int*)(new_a + bs2*new_nz); \
208         new_i   = new_j + new_nz; \
209  \
210         /* copy over old data into new slots */ \
211         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
212         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
213         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
214         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
215         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
216         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
217         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
218         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
219                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
220         /* free up old matrix storage */ \
221         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
222         if (!b->singlemalloc) { \
223           ierr = PetscFree(b->i);CHKERRQ(ierr); \
224           ierr = PetscFree(b->j);CHKERRQ(ierr); \
225         } \
226         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
227         b->singlemalloc = PETSC_TRUE; \
228  \
229         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
230         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
231         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
232         b->maxnz += bs2*CHUNKSIZE; \
233         b->reallocs++; \
234         b->nz++; \
235       } \
236       N = nrow++ - 1;  \
237       /* shift up all the later entries in this row */ \
238       for (ii=N; ii>=_i; ii--) { \
239         rp[ii+1] = rp[ii]; \
240         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
241       } \
242       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
243       rp[_i]                      = bcol;  \
244       ap[bs2*_i + bs*cidx + ridx] = value;  \
245       b_noinsert:; \
246     bilen[brow] = nrow; \
247 }
248 
249 #if defined(PETSC_USE_MAT_SINGLE)
250 #undef __FUNC__
251 #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ"
252 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
253 {
254   int       ierr,i,N = m*n;
255   MatScalar *vsingle = (MatScalar*)v;
256 
257   PetscFunctionBegin;
258   for (i=0; i<N; i++) {
259     vsingle[i] = v[i];
260   }
261   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
262   PetscFunctionReturn(0);
263 }
264 
265 #undef __FUNC__
266 #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ"
267 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
268 {
269   int       ierr,i,N = m*n*((Mat_MPIBAIJ*)mat->data)->bs2;
270   MatScalar *vsingle = (MatScalar*)v;
271 
272   PetscFunctionBegin;
273   for (i=0; i<N; i++) {
274     vsingle[i] = v[i];
275   }
276   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
277   PetscFunctionReturn(0);
278 }
279 #endif
280 
281 #undef __FUNC__
282 #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ"
283 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
284 {
285   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
286   MatScalar   value;
287   int         ierr,i,j,row,col;
288   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
289   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
290   int         cend_orig=baij->cend_bs,bs=baij->bs;
291 
292   /* Some Variables required in the macro */
293   Mat         A = baij->A;
294   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
295   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
296   MatScalar   *aa=a->a;
297 
298   Mat         B = baij->B;
299   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
300   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
301   MatScalar   *ba=b->a;
302 
303   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
304   int         low,high,t,ridx,cidx,bs2=a->bs2;
305   MatScalar   *ap,*bap;
306 
307   PetscFunctionBegin;
308   for (i=0; i<m; i++) {
309     if (im[i] < 0) continue;
310 #if defined(PETSC_USE_BOPT_g)
311     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
312 #endif
313     if (im[i] >= rstart_orig && im[i] < rend_orig) {
314       row = im[i] - rstart_orig;
315       for (j=0; j<n; j++) {
316         if (in[j] >= cstart_orig && in[j] < cend_orig){
317           col = in[j] - cstart_orig;
318           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
319           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
320           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
321         } else if (in[j] < 0) continue;
322 #if defined(PETSC_USE_BOPT_g)
323         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
324 #endif
325         else {
326           if (mat->was_assembled) {
327             if (!baij->colmap) {
328               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
329             }
330 #if defined (PETSC_USE_CTABLE)
331             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
332             col  = col - 1 + in[j]%bs;
333 #else
334             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
335 #endif
336             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
337               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
338               col =  in[j];
339               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
340               B = baij->B;
341               b = (Mat_SeqBAIJ*)(B)->data;
342               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
343               ba=b->a;
344             }
345           } else col = in[j];
346           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
347           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
348           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
349         }
350       }
351     } else {
352       if (!baij->donotstash) {
353         if (roworiented) {
354           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
355         } else {
356           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
357         }
358       }
359     }
360   }
361   PetscFunctionReturn(0);
362 }
363 
364 #undef __FUNC__
365 #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ"
366 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
367 {
368   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
369   MatScalar   *value,*barray=baij->barray;
370   int         ierr,i,j,ii,jj,row,col;
371   int         roworiented = baij->roworiented,rstart=baij->rstart ;
372   int         rend=baij->rend,cstart=baij->cstart,stepval;
373   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
374 
375   PetscFunctionBegin;
376   if(!barray) {
377     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
378   }
379 
380   if (roworiented) {
381     stepval = (n-1)*bs;
382   } else {
383     stepval = (m-1)*bs;
384   }
385   for (i=0; i<m; i++) {
386     if (im[i] < 0) continue;
387 #if defined(PETSC_USE_BOPT_g)
388     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
389 #endif
390     if (im[i] >= rstart && im[i] < rend) {
391       row = im[i] - rstart;
392       for (j=0; j<n; j++) {
393         /* If NumCol = 1 then a copy is not required */
394         if ((roworiented) && (n == 1)) {
395           barray = v + i*bs2;
396         } else if((!roworiented) && (m == 1)) {
397           barray = v + j*bs2;
398         } else { /* Here a copy is required */
399           if (roworiented) {
400             value = v + i*(stepval+bs)*bs + j*bs;
401           } else {
402             value = v + j*(stepval+bs)*bs + i*bs;
403           }
404           for (ii=0; ii<bs; ii++,value+=stepval) {
405             for (jj=0; jj<bs; jj++) {
406               *barray++  = *value++;
407             }
408           }
409           barray -=bs2;
410         }
411 
412         if (in[j] >= cstart && in[j] < cend){
413           col  = in[j] - cstart;
414           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
415         }
416         else if (in[j] < 0) continue;
417 #if defined(PETSC_USE_BOPT_g)
418         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
419 #endif
420         else {
421           if (mat->was_assembled) {
422             if (!baij->colmap) {
423               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
424             }
425 
426 #if defined(PETSC_USE_BOPT_g)
427 #if defined (PETSC_USE_CTABLE)
428             { int data;
429               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
430               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
431             }
432 #else
433             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
434 #endif
435 #endif
436 #if defined (PETSC_USE_CTABLE)
437 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
438             col  = (col - 1)/bs;
439 #else
440             col = (baij->colmap[in[j]] - 1)/bs;
441 #endif
442             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
443               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
444               col =  in[j];
445             }
446           }
447           else col = in[j];
448           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
449         }
450       }
451     } else {
452       if (!baij->donotstash) {
453         if (roworiented) {
454           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
455         } else {
456           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
457         }
458       }
459     }
460   }
461   PetscFunctionReturn(0);
462 }
463 #define HASH_KEY 0.6180339887
464 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
465 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
466 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
467 #undef __FUNC__
468 #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT"
469 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
470 {
471   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
472   int         ierr,i,j,row,col;
473   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
474   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
475   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
476   PetscReal   tmp;
477   MatScalar   ** HD = baij->hd,value;
478 #if defined(PETSC_USE_BOPT_g)
479   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
480 #endif
481 
482   PetscFunctionBegin;
483 
484   for (i=0; i<m; i++) {
485 #if defined(PETSC_USE_BOPT_g)
486     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
487     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
488 #endif
489       row = im[i];
490     if (row >= rstart_orig && row < rend_orig) {
491       for (j=0; j<n; j++) {
492         col = in[j];
493         if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m];
494         /* Look up into the Hash Table */
495         key = (row/bs)*Nbs+(col/bs)+1;
496         h1  = HASH(size,key,tmp);
497 
498 
499         idx = h1;
500 #if defined(PETSC_USE_BOPT_g)
501         insert_ct++;
502         total_ct++;
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
507             if (idx == h1) {
508               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
509             }
510           }
511         }
512 #else
513         if (HT[idx] != key) {
514           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
515           if (idx == size) {
516             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
517             if (idx == h1) {
518               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
519             }
520           }
521         }
522 #endif
523         /* A HASH table entry is found, so insert the values at the correct address */
524         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
525         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
526       }
527     } else {
528       if (!baij->donotstash) {
529         if (roworiented) {
530           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
531         } else {
532           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
533         }
534       }
535     }
536   }
537 #if defined(PETSC_USE_BOPT_g)
538   baij->ht_total_ct = total_ct;
539   baij->ht_insert_ct = insert_ct;
540 #endif
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNC__
545 #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT"
546 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
547 {
548   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
549   int         ierr,i,j,ii,jj,row,col;
550   int         roworiented = baij->roworiented,rstart=baij->rstart ;
551   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
552   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
553   PetscReal   tmp;
554   MatScalar   **HD = baij->hd,*baij_a;
555   Scalar      *v_t,*value;
556 #if defined(PETSC_USE_BOPT_g)
557   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
558 #endif
559 
560   PetscFunctionBegin;
561 
562   if (roworiented) {
563     stepval = (n-1)*bs;
564   } else {
565     stepval = (m-1)*bs;
566   }
567   for (i=0; i<m; i++) {
568 #if defined(PETSC_USE_BOPT_g)
569     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
570     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
571 #endif
572     row   = im[i];
573     v_t   = v + i*bs2;
574     if (row >= rstart && row < rend) {
575       for (j=0; j<n; j++) {
576         col = in[j];
577 
578         /* Look up into the Hash Table */
579         key = row*Nbs+col+1;
580         h1  = HASH(size,key,tmp);
581 
582         idx = h1;
583 #if defined(PETSC_USE_BOPT_g)
584         total_ct++;
585         insert_ct++;
586        if (HT[idx] != key) {
587           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
588           if (idx == size) {
589             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
590             if (idx == h1) {
591               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
592             }
593           }
594         }
595 #else
596         if (HT[idx] != key) {
597           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
598           if (idx == size) {
599             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
600             if (idx == h1) {
601               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
602             }
603           }
604         }
605 #endif
606         baij_a = HD[idx];
607         if (roworiented) {
608           /*value = v + i*(stepval+bs)*bs + j*bs;*/
609           /* value = v + (i*(stepval+bs)+j)*bs; */
610           value = v_t;
611           v_t  += bs;
612           if (addv == ADD_VALUES) {
613             for (ii=0; ii<bs; ii++,value+=stepval) {
614               for (jj=ii; jj<bs2; jj+=bs) {
615                 baij_a[jj]  += *value++;
616               }
617             }
618           } else {
619             for (ii=0; ii<bs; ii++,value+=stepval) {
620               for (jj=ii; jj<bs2; jj+=bs) {
621                 baij_a[jj]  = *value++;
622               }
623             }
624           }
625         } else {
626           value = v + j*(stepval+bs)*bs + i*bs;
627           if (addv == ADD_VALUES) {
628             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
629               for (jj=0; jj<bs; jj++) {
630                 baij_a[jj]  += *value++;
631               }
632             }
633           } else {
634             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
635               for (jj=0; jj<bs; jj++) {
636                 baij_a[jj]  = *value++;
637               }
638             }
639           }
640         }
641       }
642     } else {
643       if (!baij->donotstash) {
644         if (roworiented) {
645           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
646         } else {
647           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
648         }
649       }
650     }
651   }
652 #if defined(PETSC_USE_BOPT_g)
653   baij->ht_total_ct = total_ct;
654   baij->ht_insert_ct = insert_ct;
655 #endif
656   PetscFunctionReturn(0);
657 }
658 
659 #undef __FUNC__
660 #define  __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ"
661 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
662 {
663   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
664   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
665   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
666 
667   PetscFunctionBegin;
668   for (i=0; i<m; i++) {
669     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
670     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
671     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
672       row = idxm[i] - bsrstart;
673       for (j=0; j<n; j++) {
674         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
675         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
676         if (idxn[j] >= bscstart && idxn[j] < bscend){
677           col = idxn[j] - bscstart;
678           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
679         } else {
680           if (!baij->colmap) {
681             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
682           }
683 #if defined (PETSC_USE_CTABLE)
684           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
685           data --;
686 #else
687           data = baij->colmap[idxn[j]/bs]-1;
688 #endif
689           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
690           else {
691             col  = data + idxn[j]%bs;
692             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
693           }
694         }
695       }
696     } else {
697       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
698     }
699   }
700  PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNC__
704 #define  __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ"
705 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
706 {
707   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
708   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
709   int        ierr,i,bs2=baij->bs2;
710   PetscReal  sum = 0.0;
711   MatScalar  *v;
712 
713   PetscFunctionBegin;
714   if (baij->size == 1) {
715     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
716   } else {
717     if (type == NORM_FROBENIUS) {
718       v = amat->a;
719       for (i=0; i<amat->nz*bs2; i++) {
720 #if defined(PETSC_USE_COMPLEX)
721         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
722 #else
723         sum += (*v)*(*v); v++;
724 #endif
725       }
726       v = bmat->a;
727       for (i=0; i<bmat->nz*bs2; i++) {
728 #if defined(PETSC_USE_COMPLEX)
729         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
730 #else
731         sum += (*v)*(*v); v++;
732 #endif
733       }
734       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
735       *norm = sqrt(*norm);
736     } else {
737       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 
744 /*
745   Creates the hash table, and sets the table
746   This table is created only once.
747   If new entried need to be added to the matrix
748   then the hash table has to be destroyed and
749   recreated.
750 */
751 #undef __FUNC__
752 #define  __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private"
753 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
754 {
755   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
756   Mat         A = baij->A,B=baij->B;
757   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
758   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
759   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
760   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
761   int         *HT,key;
762   MatScalar   **HD;
763   PetscReal   tmp;
764 #if defined(PETSC_USE_BOPT_g)
765   int         ct=0,max=0;
766 #endif
767 
768   PetscFunctionBegin;
769   baij->ht_size=(int)(factor*nz);
770   size = baij->ht_size;
771 
772   if (baij->ht) {
773     PetscFunctionReturn(0);
774   }
775 
776   /* Allocate Memory for Hash Table */
777   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
778   baij->ht = (int*)(baij->hd + size);
779   HD = baij->hd;
780   HT = baij->ht;
781 
782 
783   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
784 
785 
786   /* Loop Over A */
787   for (i=0; i<a->mbs; i++) {
788     for (j=ai[i]; j<ai[i+1]; j++) {
789       row = i+rstart;
790       col = aj[j]+cstart;
791 
792       key = row*Nbs + col + 1;
793       h1  = HASH(size,key,tmp);
794       for (k=0; k<size; k++){
795         if (HT[(h1+k)%size] == 0.0) {
796           HT[(h1+k)%size] = key;
797           HD[(h1+k)%size] = a->a + j*bs2;
798           break;
799 #if defined(PETSC_USE_BOPT_g)
800         } else {
801           ct++;
802 #endif
803         }
804       }
805 #if defined(PETSC_USE_BOPT_g)
806       if (k> max) max = k;
807 #endif
808     }
809   }
810   /* Loop Over B */
811   for (i=0; i<b->mbs; i++) {
812     for (j=bi[i]; j<bi[i+1]; j++) {
813       row = i+rstart;
814       col = garray[bj[j]];
815       key = row*Nbs + col + 1;
816       h1  = HASH(size,key,tmp);
817       for (k=0; k<size; k++){
818         if (HT[(h1+k)%size] == 0.0) {
819           HT[(h1+k)%size] = key;
820           HD[(h1+k)%size] = b->a + j*bs2;
821           break;
822 #if defined(PETSC_USE_BOPT_g)
823         } else {
824           ct++;
825 #endif
826         }
827       }
828 #if defined(PETSC_USE_BOPT_g)
829       if (k> max) max = k;
830 #endif
831     }
832   }
833 
834   /* Print Summary */
835 #if defined(PETSC_USE_BOPT_g)
836   for (i=0,j=0; i<size; i++) {
837     if (HT[i]) {j++;}
838   }
839   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
840 #endif
841   PetscFunctionReturn(0);
842 }
843 
844 #undef __FUNC__
845 #define  __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ"
846 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
847 {
848   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
849   int         ierr,nstash,reallocs;
850   InsertMode  addv;
851 
852   PetscFunctionBegin;
853   if (baij->donotstash) {
854     PetscFunctionReturn(0);
855   }
856 
857   /* make sure all processors are either in INSERTMODE or ADDMODE */
858   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
859   if (addv == (ADD_VALUES|INSERT_VALUES)) {
860     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
861   }
862   mat->insertmode = addv; /* in case this processor had no cache */
863 
864   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
865   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
866   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
867   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
868   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
869   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNC__
874 #define  __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ"
875 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
876 {
877   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
878   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
879   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
880   int         *row,*col,other_disassembled;
881   PetscTruth  r1,r2,r3;
882   MatScalar   *val;
883   InsertMode  addv = mat->insertmode;
884 
885   PetscFunctionBegin;
886   if (!baij->donotstash) {
887     while (1) {
888       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
889       if (!flg) break;
890 
891       for (i=0; i<n;) {
892         /* Now identify the consecutive vals belonging to the same row */
893         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
894         if (j < n) ncols = j-i;
895         else       ncols = n-i;
896         /* Now assemble all these values with a single function call */
897         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
898         i = j;
899       }
900     }
901     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
902     /* Now process the block-stash. Since the values are stashed column-oriented,
903        set the roworiented flag to column oriented, and after MatSetValues()
904        restore the original flags */
905     r1 = baij->roworiented;
906     r2 = a->roworiented;
907     r3 = b->roworiented;
908     baij->roworiented = PETSC_FALSE;
909     a->roworiented    = PETSC_FALSE;
910     b->roworiented    = PETSC_FALSE;
911     while (1) {
912       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
913       if (!flg) break;
914 
915       for (i=0; i<n;) {
916         /* Now identify the consecutive vals belonging to the same row */
917         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
918         if (j < n) ncols = j-i;
919         else       ncols = n-i;
920         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
921         i = j;
922       }
923     }
924     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
925     baij->roworiented = r1;
926     a->roworiented    = r2;
927     b->roworiented    = r3;
928   }
929 
930   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
932 
933   /* determine if any processor has disassembled, if so we must
934      also disassemble ourselfs, in order that we may reassemble. */
935   /*
936      if nonzero structure of submatrix B cannot change then we know that
937      no processor disassembled thus we can skip this stuff
938   */
939   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
940     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
941     if (mat->was_assembled && !other_disassembled) {
942       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
943     }
944   }
945 
946   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
947     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
948   }
949   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
950   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
951 
952 #if defined(PETSC_USE_BOPT_g)
953   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
954     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
955     baij->ht_total_ct  = 0;
956     baij->ht_insert_ct = 0;
957   }
958 #endif
959   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
960     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
961     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
962     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
963   }
964 
965   if (baij->rowvalues) {
966     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
967     baij->rowvalues = 0;
968   }
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNC__
973 #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket"
974 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
975 {
976   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
977   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
978   PetscTruth   isascii,isdraw;
979 
980   PetscFunctionBegin;
981   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
982   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
983   if (isascii) {
984     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
985     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
986       MatInfo info;
987       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
988       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
989       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
990               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
991               baij->bs,(int)info.memory);CHKERRQ(ierr);
992       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
993       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
994       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
995       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
996       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
997       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
998       PetscFunctionReturn(0);
999     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1000       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1001       PetscFunctionReturn(0);
1002     }
1003   }
1004 
1005   if (isdraw) {
1006     Draw       draw;
1007     PetscTruth isnull;
1008     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1009     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1010   }
1011 
1012   if (size == 1) {
1013     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1014   } else {
1015     /* assemble the entire matrix onto first processor. */
1016     Mat         A;
1017     Mat_SeqBAIJ *Aloc;
1018     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1019     MatScalar   *a;
1020 
1021     if (!rank) {
1022       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1023     } else {
1024       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1025     }
1026     PLogObjectParent(mat,A);
1027 
1028     /* copy over the A part */
1029     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
1030     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1031     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1032 
1033     for (i=0; i<mbs; i++) {
1034       rvals[0] = bs*(baij->rstart + i);
1035       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1036       for (j=ai[i]; j<ai[i+1]; j++) {
1037         col = (baij->cstart+aj[j])*bs;
1038         for (k=0; k<bs; k++) {
1039           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1040           col++; a += bs;
1041         }
1042       }
1043     }
1044     /* copy over the B part */
1045     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1046     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1047     for (i=0; i<mbs; i++) {
1048       rvals[0] = bs*(baij->rstart + i);
1049       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1050       for (j=ai[i]; j<ai[i+1]; j++) {
1051         col = baij->garray[aj[j]]*bs;
1052         for (k=0; k<bs; k++) {
1053           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1054           col++; a += bs;
1055         }
1056       }
1057     }
1058     ierr = PetscFree(rvals);CHKERRQ(ierr);
1059     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1060     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1061     /*
1062        Everyone has to call to draw the matrix since the graphics waits are
1063        synchronized across all processors that share the Draw object
1064     */
1065     if (!rank) {
1066       Viewer sviewer;
1067       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1068       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1069       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1070     }
1071     ierr = MatDestroy(A);CHKERRQ(ierr);
1072   }
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNC__
1077 #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ"
1078 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1079 {
1080   int        ierr;
1081   PetscTruth isascii,isdraw,issocket,isbinary;
1082 
1083   PetscFunctionBegin;
1084   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1085   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1086   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1087   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1088   if (isascii || isdraw || issocket || isbinary) {
1089     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1090   } else {
1091     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNC__
1097 #define  __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ"
1098 int MatDestroy_MPIBAIJ(Mat mat)
1099 {
1100   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1101   int         ierr;
1102 
1103   PetscFunctionBegin;
1104 
1105   if (mat->mapping) {
1106     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1107   }
1108   if (mat->bmapping) {
1109     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1110   }
1111   if (mat->rmap) {
1112     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1113   }
1114   if (mat->cmap) {
1115     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1116   }
1117 #if defined(PETSC_USE_LOG)
1118   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1119 #endif
1120 
1121   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1122   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1123 
1124   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1125   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1126   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1127 #if defined (PETSC_USE_CTABLE)
1128   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1129 #else
1130   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1131 #endif
1132   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1133   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1134   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1135   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1136   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1137   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1138   ierr = PetscFree(baij);CHKERRQ(ierr);
1139   PLogObjectDestroy(mat);
1140   PetscHeaderDestroy(mat);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNC__
1145 #define  __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ"
1146 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1147 {
1148   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1149   int         ierr,nt;
1150 
1151   PetscFunctionBegin;
1152   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1153   if (nt != a->n) {
1154     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1155   }
1156   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1157   if (nt != a->m) {
1158     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1159   }
1160   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1161   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1162   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1163   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1164   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNC__
1169 #define  __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ"
1170 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1171 {
1172   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1173   int        ierr;
1174 
1175   PetscFunctionBegin;
1176   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1177   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1178   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1179   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNC__
1184 #define  __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ"
1185 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1186 {
1187   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1188   int         ierr;
1189 
1190   PetscFunctionBegin;
1191   /* do nondiagonal part */
1192   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1193   /* send it on its way */
1194   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1195   /* do local part */
1196   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1197   /* receive remote parts: note this assumes the values are not actually */
1198   /* inserted in yy until the next line, which is true for my implementation*/
1199   /* but is not perhaps always true. */
1200   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNC__
1205 #define  __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ"
1206 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1207 {
1208   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1209   int         ierr;
1210 
1211   PetscFunctionBegin;
1212   /* do nondiagonal part */
1213   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1214   /* send it on its way */
1215   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1216   /* do local part */
1217   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1218   /* receive remote parts: note this assumes the values are not actually */
1219   /* inserted in yy until the next line, which is true for my implementation*/
1220   /* but is not perhaps always true. */
1221   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 /*
1226   This only works correctly for square matrices where the subblock A->A is the
1227    diagonal block
1228 */
1229 #undef __FUNC__
1230 #define  __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ"
1231 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1232 {
1233   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1234   int         ierr;
1235 
1236   PetscFunctionBegin;
1237   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1238   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNC__
1243 #define  __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ"
1244 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1245 {
1246   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1247   int         ierr;
1248 
1249   PetscFunctionBegin;
1250   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1251   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNC__
1256 #define  __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ"
1257 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1258 {
1259   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1260 
1261   PetscFunctionBegin;
1262   if (m) *m = mat->M;
1263   if (n) *n = mat->N;
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNC__
1268 #define  __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ"
1269 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1270 {
1271   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1272 
1273   PetscFunctionBegin;
1274   *m = mat->m; *n = mat->n;
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNC__
1279 #define  __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ"
1280 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1281 {
1282   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1283 
1284   PetscFunctionBegin;
1285   if (m) *m = mat->rstart*mat->bs;
1286   if (n) *n = mat->rend*mat->bs;
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNC__
1291 #define  __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ"
1292 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1293 {
1294   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1295   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1296   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1297   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1298   int        *cmap,*idx_p,cstart = mat->cstart;
1299 
1300   PetscFunctionBegin;
1301   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1302   mat->getrowactive = PETSC_TRUE;
1303 
1304   if (!mat->rowvalues && (idx || v)) {
1305     /*
1306         allocate enough space to hold information from the longest row.
1307     */
1308     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1309     int     max = 1,mbs = mat->mbs,tmp;
1310     for (i=0; i<mbs; i++) {
1311       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1312       if (max < tmp) { max = tmp; }
1313     }
1314     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1315     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1316   }
1317 
1318   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1319   lrow = row - brstart;
1320 
1321   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1322   if (!v)   {pvA = 0; pvB = 0;}
1323   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1324   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1325   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1326   nztot = nzA + nzB;
1327 
1328   cmap  = mat->garray;
1329   if (v  || idx) {
1330     if (nztot) {
1331       /* Sort by increasing column numbers, assuming A and B already sorted */
1332       int imark = -1;
1333       if (v) {
1334         *v = v_p = mat->rowvalues;
1335         for (i=0; i<nzB; i++) {
1336           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1337           else break;
1338         }
1339         imark = i;
1340         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1341         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1342       }
1343       if (idx) {
1344         *idx = idx_p = mat->rowindices;
1345         if (imark > -1) {
1346           for (i=0; i<imark; i++) {
1347             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1348           }
1349         } else {
1350           for (i=0; i<nzB; i++) {
1351             if (cmap[cworkB[i]/bs] < cstart)
1352               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1353             else break;
1354           }
1355           imark = i;
1356         }
1357         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1358         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1359       }
1360     } else {
1361       if (idx) *idx = 0;
1362       if (v)   *v   = 0;
1363     }
1364   }
1365   *nz = nztot;
1366   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1367   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNC__
1372 #define  __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ"
1373 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1374 {
1375   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1376 
1377   PetscFunctionBegin;
1378   if (baij->getrowactive == PETSC_FALSE) {
1379     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1380   }
1381   baij->getrowactive = PETSC_FALSE;
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNC__
1386 #define  __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ"
1387 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1388 {
1389   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1390 
1391   PetscFunctionBegin;
1392   *bs = baij->bs;
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNC__
1397 #define  __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ"
1398 int MatZeroEntries_MPIBAIJ(Mat A)
1399 {
1400   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1401   int         ierr;
1402 
1403   PetscFunctionBegin;
1404   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1405   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNC__
1410 #define  __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ"
1411 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1412 {
1413   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1414   Mat         A = a->A,B = a->B;
1415   int         ierr;
1416   PetscReal   isend[5],irecv[5];
1417 
1418   PetscFunctionBegin;
1419   info->block_size     = (double)a->bs;
1420   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1421   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1422   isend[3] = info->memory;  isend[4] = info->mallocs;
1423   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1424   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1425   isend[3] += info->memory;  isend[4] += info->mallocs;
1426   if (flag == MAT_LOCAL) {
1427     info->nz_used      = isend[0];
1428     info->nz_allocated = isend[1];
1429     info->nz_unneeded  = isend[2];
1430     info->memory       = isend[3];
1431     info->mallocs      = isend[4];
1432   } else if (flag == MAT_GLOBAL_MAX) {
1433     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1434     info->nz_used      = irecv[0];
1435     info->nz_allocated = irecv[1];
1436     info->nz_unneeded  = irecv[2];
1437     info->memory       = irecv[3];
1438     info->mallocs      = irecv[4];
1439   } else if (flag == MAT_GLOBAL_SUM) {
1440     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1441     info->nz_used      = irecv[0];
1442     info->nz_allocated = irecv[1];
1443     info->nz_unneeded  = irecv[2];
1444     info->memory       = irecv[3];
1445     info->mallocs      = irecv[4];
1446   } else {
1447     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1448   }
1449   info->rows_global       = (double)a->M;
1450   info->columns_global    = (double)a->N;
1451   info->rows_local        = (double)a->m;
1452   info->columns_local     = (double)a->N;
1453   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1454   info->fill_ratio_needed = 0;
1455   info->factor_mallocs    = 0;
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNC__
1460 #define  __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ"
1461 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1462 {
1463   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1464   int         ierr;
1465 
1466   PetscFunctionBegin;
1467   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1468       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1469       op == MAT_COLUMNS_UNSORTED ||
1470       op == MAT_COLUMNS_SORTED ||
1471       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1472       op == MAT_KEEP_ZEROED_ROWS ||
1473       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1474         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1475         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1476   } else if (op == MAT_ROW_ORIENTED) {
1477         a->roworiented = PETSC_TRUE;
1478         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1479         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1480   } else if (op == MAT_ROWS_SORTED ||
1481              op == MAT_ROWS_UNSORTED ||
1482              op == MAT_SYMMETRIC ||
1483              op == MAT_STRUCTURALLY_SYMMETRIC ||
1484              op == MAT_YES_NEW_DIAGONALS ||
1485              op == MAT_USE_HASH_TABLE) {
1486     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1487   } else if (op == MAT_COLUMN_ORIENTED) {
1488     a->roworiented = PETSC_FALSE;
1489     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1490     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1491   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1492     a->donotstash = PETSC_TRUE;
1493   } else if (op == MAT_NO_NEW_DIAGONALS) {
1494     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1495   } else if (op == MAT_USE_HASH_TABLE) {
1496     a->ht_flag = PETSC_TRUE;
1497   } else {
1498     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNC__
1504 #define  __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ("
1505 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1506 {
1507   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1508   Mat_SeqBAIJ *Aloc;
1509   Mat         B;
1510   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1511   int         bs=baij->bs,mbs=baij->mbs;
1512   MatScalar   *a;
1513 
1514   PetscFunctionBegin;
1515   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1516   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1517 
1518   /* copy over the A part */
1519   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1520   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1521   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1522 
1523   for (i=0; i<mbs; i++) {
1524     rvals[0] = bs*(baij->rstart + i);
1525     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1526     for (j=ai[i]; j<ai[i+1]; j++) {
1527       col = (baij->cstart+aj[j])*bs;
1528       for (k=0; k<bs; k++) {
1529         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1530         col++; a += bs;
1531       }
1532     }
1533   }
1534   /* copy over the B part */
1535   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1536   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1537   for (i=0; i<mbs; i++) {
1538     rvals[0] = bs*(baij->rstart + i);
1539     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1540     for (j=ai[i]; j<ai[i+1]; j++) {
1541       col = baij->garray[aj[j]]*bs;
1542       for (k=0; k<bs; k++) {
1543         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1544         col++; a += bs;
1545       }
1546     }
1547   }
1548   ierr = PetscFree(rvals);CHKERRQ(ierr);
1549   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1550   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1551 
1552   if (matout) {
1553     *matout = B;
1554   } else {
1555     PetscOps *Abops;
1556     MatOps   Aops;
1557 
1558     /* This isn't really an in-place transpose .... but free data structures from baij */
1559     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1560     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1561     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1562 #if defined (PETSC_USE_CTABLE)
1563     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1564 #else
1565     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1566 #endif
1567     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1568     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1569     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1570     ierr = PetscFree(baij);CHKERRQ(ierr);
1571 
1572     /*
1573        This is horrible, horrible code. We need to keep the
1574       A pointers for the bops and ops but copy everything
1575       else from C.
1576     */
1577     Abops   = A->bops;
1578     Aops    = A->ops;
1579     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1580     A->bops = Abops;
1581     A->ops  = Aops;
1582 
1583     PetscHeaderDestroy(B);
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #undef __FUNC__
1589 #define  __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ"
1590 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1591 {
1592   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1593   Mat         a = baij->A,b = baij->B;
1594   int         ierr,s1,s2,s3;
1595 
1596   PetscFunctionBegin;
1597   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1598   if (rr) {
1599     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1600     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1601     /* Overlap communication with computation. */
1602     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1603   }
1604   if (ll) {
1605     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1606     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1607     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1608   }
1609   /* scale  the diagonal block */
1610   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1611 
1612   if (rr) {
1613     /* Do a scatter end and then right scale the off-diagonal block */
1614     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1615     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1616   }
1617 
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 #undef __FUNC__
1622 #define  __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ"
1623 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1624 {
1625   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1626   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1627   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1628   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1629   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1630   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1631   MPI_Comm       comm = A->comm;
1632   MPI_Request    *send_waits,*recv_waits;
1633   MPI_Status     recv_status,*send_status;
1634   IS             istmp;
1635 
1636   PetscFunctionBegin;
1637   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1638   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1639 
1640   /*  first count number of contributors to each processor */
1641   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1642   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1643   procs  = nprocs + size;
1644   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1645   for (i=0; i<N; i++) {
1646     idx   = rows[i];
1647     found = 0;
1648     for (j=0; j<size; j++) {
1649       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1650         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1651       }
1652     }
1653     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1654   }
1655   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1656 
1657   /* inform other processors of number of messages and max length*/
1658   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1659   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1660   nmax   = work[rank];
1661   nrecvs = work[size+rank];
1662   ierr = PetscFree(work);CHKERRQ(ierr);
1663 
1664   /* post receives:   */
1665   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1666   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1667   for (i=0; i<nrecvs; i++) {
1668     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1669   }
1670 
1671   /* do sends:
1672      1) starts[i] gives the starting index in svalues for stuff going to
1673      the ith processor
1674   */
1675   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1676   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1677   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1678   starts[0]  = 0;
1679   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1680   for (i=0; i<N; i++) {
1681     svalues[starts[owner[i]]++] = rows[i];
1682   }
1683   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1684 
1685   starts[0] = 0;
1686   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1687   count = 0;
1688   for (i=0; i<size; i++) {
1689     if (procs[i]) {
1690       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1691     }
1692   }
1693   ierr = PetscFree(starts);CHKERRQ(ierr);
1694 
1695   base = owners[rank]*bs;
1696 
1697   /*  wait on receives */
1698   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1699   source = lens + nrecvs;
1700   count  = nrecvs; slen = 0;
1701   while (count) {
1702     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1703     /* unpack receives into our local space */
1704     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1705     source[imdex]  = recv_status.MPI_SOURCE;
1706     lens[imdex]    = n;
1707     slen          += n;
1708     count--;
1709   }
1710   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1711 
1712   /* move the data into the send scatter */
1713   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1714   count = 0;
1715   for (i=0; i<nrecvs; i++) {
1716     values = rvalues + i*nmax;
1717     for (j=0; j<lens[i]; j++) {
1718       lrows[count++] = values[j] - base;
1719     }
1720   }
1721   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1722   ierr = PetscFree(lens);CHKERRQ(ierr);
1723   ierr = PetscFree(owner);CHKERRQ(ierr);
1724   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1725 
1726   /* actually zap the local rows */
1727   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1728   PLogObjectParent(A,istmp);
1729 
1730   /*
1731         Zero the required rows. If the "diagonal block" of the matrix
1732      is square and the user wishes to set the diagonal we use seperate
1733      code so that MatSetValues() is not called for each diagonal allocating
1734      new memory, thus calling lots of mallocs and slowing things down.
1735 
1736        Contributed by: Mathew Knepley
1737   */
1738   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1739   ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr);
1740   if (diag && (l->A->M == l->A->N)) {
1741     ierr      = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1742   } else if (diag) {
1743     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1744     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1745       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1746 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1747     }
1748     for (i = 0; i < slen; i++) {
1749       row  = lrows[i] + rstart_bs;
1750       ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1751     }
1752     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1753     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1754   } else {
1755     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1756   }
1757 
1758   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1759   ierr = PetscFree(lrows);CHKERRQ(ierr);
1760 
1761   /* wait on sends */
1762   if (nsends) {
1763     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1764     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1765     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1766   }
1767   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1768   ierr = PetscFree(svalues);CHKERRQ(ierr);
1769 
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 #undef __FUNC__
1774 #define  __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ"
1775 int MatPrintHelp_MPIBAIJ(Mat A)
1776 {
1777   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1778   MPI_Comm    comm = A->comm;
1779   static int  called = 0;
1780   int         ierr;
1781 
1782   PetscFunctionBegin;
1783   if (!a->rank) {
1784     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1785   }
1786   if (called) {PetscFunctionReturn(0);} else called = 1;
1787   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1788   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 #undef __FUNC__
1793 #define  __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ"
1794 int MatSetUnfactored_MPIBAIJ(Mat A)
1795 {
1796   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1797   int         ierr;
1798 
1799   PetscFunctionBegin;
1800   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1805 
1806 #undef __FUNC__
1807 #define  __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ"
1808 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1809 {
1810   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1811   Mat         a,b,c,d;
1812   PetscTruth  flg;
1813   int         ierr;
1814 
1815   PetscFunctionBegin;
1816   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1817   a = matA->A; b = matA->B;
1818   c = matB->A; d = matB->B;
1819 
1820   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1821   if (flg == PETSC_TRUE) {
1822     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1823   }
1824   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 /* -------------------------------------------------------------------*/
1829 static struct _MatOps MatOps_Values = {
1830   MatSetValues_MPIBAIJ,
1831   MatGetRow_MPIBAIJ,
1832   MatRestoreRow_MPIBAIJ,
1833   MatMult_MPIBAIJ,
1834   MatMultAdd_MPIBAIJ,
1835   MatMultTranspose_MPIBAIJ,
1836   MatMultTransposeAdd_MPIBAIJ,
1837   0,
1838   0,
1839   0,
1840   0,
1841   0,
1842   0,
1843   0,
1844   MatTranspose_MPIBAIJ,
1845   MatGetInfo_MPIBAIJ,
1846   MatEqual_MPIBAIJ,
1847   MatGetDiagonal_MPIBAIJ,
1848   MatDiagonalScale_MPIBAIJ,
1849   MatNorm_MPIBAIJ,
1850   MatAssemblyBegin_MPIBAIJ,
1851   MatAssemblyEnd_MPIBAIJ,
1852   0,
1853   MatSetOption_MPIBAIJ,
1854   MatZeroEntries_MPIBAIJ,
1855   MatZeroRows_MPIBAIJ,
1856   0,
1857   0,
1858   0,
1859   0,
1860   MatGetSize_MPIBAIJ,
1861   MatGetLocalSize_MPIBAIJ,
1862   MatGetOwnershipRange_MPIBAIJ,
1863   0,
1864   0,
1865   0,
1866   0,
1867   MatDuplicate_MPIBAIJ,
1868   0,
1869   0,
1870   0,
1871   0,
1872   0,
1873   MatGetSubMatrices_MPIBAIJ,
1874   MatIncreaseOverlap_MPIBAIJ,
1875   MatGetValues_MPIBAIJ,
1876   0,
1877   MatPrintHelp_MPIBAIJ,
1878   MatScale_MPIBAIJ,
1879   0,
1880   0,
1881   0,
1882   MatGetBlockSize_MPIBAIJ,
1883   0,
1884   0,
1885   0,
1886   0,
1887   0,
1888   0,
1889   MatSetUnfactored_MPIBAIJ,
1890   0,
1891   MatSetValuesBlocked_MPIBAIJ,
1892   0,
1893   0,
1894   0,
1895   MatGetMaps_Petsc};
1896 
1897 
1898 EXTERN_C_BEGIN
1899 #undef __FUNC__
1900 #define  __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ"
1901 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1902 {
1903   PetscFunctionBegin;
1904   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1905   *iscopy = PETSC_FALSE;
1906   PetscFunctionReturn(0);
1907 }
1908 EXTERN_C_END
1909 
1910 #undef __FUNC__
1911 #define  __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ"
1912 /*@C
1913    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1914    (block compressed row).  For good matrix assembly performance
1915    the user should preallocate the matrix storage by setting the parameters
1916    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1917    performance can be increased by more than a factor of 50.
1918 
1919    Collective on MPI_Comm
1920 
1921    Input Parameters:
1922 +  comm - MPI communicator
1923 .  bs   - size of blockk
1924 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1925            This value should be the same as the local size used in creating the
1926            y vector for the matrix-vector product y = Ax.
1927 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1928            This value should be the same as the local size used in creating the
1929            x vector for the matrix-vector product y = Ax.
1930 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1931 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1932 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1933            submatrix  (same for all local rows)
1934 .  d_nnz - array containing the number of block nonzeros in the various block rows
1935            of the in diagonal portion of the local (possibly different for each block
1936            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1937 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1938            submatrix (same for all local rows).
1939 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1940            off-diagonal portion of the local submatrix (possibly different for
1941            each block row) or PETSC_NULL.
1942 
1943    Output Parameter:
1944 .  A - the matrix
1945 
1946    Options Database Keys:
1947 .   -mat_no_unroll - uses code that does not unroll the loops in the
1948                      block calculations (much slower)
1949 .   -mat_block_size - size of the blocks to use
1950 .   -mat_mpi - use the parallel matrix data structures even on one processor
1951                (defaults to using SeqBAIJ format on one processor)
1952 
1953    Notes:
1954    The user MUST specify either the local or global matrix dimensions
1955    (possibly both).
1956 
1957    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1958    than it must be used on all processors that share the object for that argument.
1959 
1960    Storage Information:
1961    For a square global matrix we define each processor's diagonal portion
1962    to be its local rows and the corresponding columns (a square submatrix);
1963    each processor's off-diagonal portion encompasses the remainder of the
1964    local matrix (a rectangular submatrix).
1965 
1966    The user can specify preallocated storage for the diagonal part of
1967    the local submatrix with either d_nz or d_nnz (not both).  Set
1968    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1969    memory allocation.  Likewise, specify preallocated storage for the
1970    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1971 
1972    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1973    the figure below we depict these three local rows and all columns (0-11).
1974 
1975 .vb
1976            0 1 2 3 4 5 6 7 8 9 10 11
1977           -------------------
1978    row 3  |  o o o d d d o o o o o o
1979    row 4  |  o o o d d d o o o o o o
1980    row 5  |  o o o d d d o o o o o o
1981           -------------------
1982 .ve
1983 
1984    Thus, any entries in the d locations are stored in the d (diagonal)
1985    submatrix, and any entries in the o locations are stored in the
1986    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1987    stored simply in the MATSEQBAIJ format for compressed row storage.
1988 
1989    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1990    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1991    In general, for PDE problems in which most nonzeros are near the diagonal,
1992    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1993    or you will get TERRIBLE performance; see the users' manual chapter on
1994    matrices.
1995 
1996    Level: intermediate
1997 
1998 .keywords: matrix, block, aij, compressed row, sparse, parallel
1999 
2000 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2001 @*/
2002 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2003 {
2004   Mat          B;
2005   Mat_MPIBAIJ  *b;
2006   int          ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
2007   PetscTruth   flag1,flag2,flg;
2008 
2009   PetscFunctionBegin;
2010   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2011 
2012   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
2013   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
2014   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
2015   if (d_nnz) {
2016     for (i=0; i<m/bs; i++) {
2017       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2018     }
2019   }
2020   if (o_nnz) {
2021     for (i=0; i<m/bs; i++) {
2022       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2023     }
2024   }
2025 
2026   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2027   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2028   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2029   if (!flag1 && !flag2 && size == 1) {
2030     if (M == PETSC_DECIDE) M = m;
2031     if (N == PETSC_DECIDE) N = n;
2032     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2033     PetscFunctionReturn(0);
2034   }
2035 
2036   *A = 0;
2037   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2038   PLogObjectCreate(B);
2039   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2040   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2041   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2042 
2043   B->ops->destroy    = MatDestroy_MPIBAIJ;
2044   B->ops->view       = MatView_MPIBAIJ;
2045   B->mapping    = 0;
2046   B->factor     = 0;
2047   B->assembled  = PETSC_FALSE;
2048 
2049   B->insertmode = NOT_SET_VALUES;
2050   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2051   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2052 
2053   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2054     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2055   }
2056   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2057     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2058   }
2059   if (N == PETSC_DECIDE && n == PETSC_DECIDE) {
2060     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2061   }
2062   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2063   if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2064 
2065   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2066     work[0] = m; work[1] = n;
2067     mbs = m/bs; nbs = n/bs;
2068     ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2069     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2070     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2071   }
2072   if (m == PETSC_DECIDE) {
2073     Mbs = M/bs;
2074     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2075     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2076     m   = mbs*bs;
2077   }
2078   if (n == PETSC_DECIDE) {
2079     Nbs = N/bs;
2080     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2081     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2082     n   = nbs*bs;
2083   }
2084   if (mbs*bs != m || nbs*bs != n) {
2085     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2086   }
2087 
2088   b->m = m; B->m = m;
2089   b->n = n; B->n = n;
2090   b->N = N; B->N = N;
2091   b->M = M; B->M = M;
2092   b->bs  = bs;
2093   b->bs2 = bs*bs;
2094   b->mbs = mbs;
2095   b->nbs = nbs;
2096   b->Mbs = Mbs;
2097   b->Nbs = Nbs;
2098 
2099   /* the information in the maps duplicates the information computed below, eventually
2100      we should remove the duplicate information that is not contained in the maps */
2101   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2102   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2103 
2104   /* build local table of row and column ownerships */
2105   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2106   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2107   b->cowners    = b->rowners + b->size + 2;
2108   b->rowners_bs = b->cowners + b->size + 2;
2109   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2110   b->rowners[0]    = 0;
2111   for (i=2; i<=b->size; i++) {
2112     b->rowners[i] += b->rowners[i-1];
2113   }
2114   for (i=0; i<=b->size; i++) {
2115     b->rowners_bs[i] = b->rowners[i]*bs;
2116   }
2117   b->rstart    = b->rowners[b->rank];
2118   b->rend      = b->rowners[b->rank+1];
2119   b->rstart_bs = b->rstart * bs;
2120   b->rend_bs   = b->rend * bs;
2121 
2122   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2123   b->cowners[0] = 0;
2124   for (i=2; i<=b->size; i++) {
2125     b->cowners[i] += b->cowners[i-1];
2126   }
2127   b->cstart    = b->cowners[b->rank];
2128   b->cend      = b->cowners[b->rank+1];
2129   b->cstart_bs = b->cstart * bs;
2130   b->cend_bs   = b->cend * bs;
2131 
2132 
2133   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2134   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2135   PLogObjectParent(B,b->A);
2136   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2137   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2138   PLogObjectParent(B,b->B);
2139 
2140   /* build cache for off array entries formed */
2141   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2142   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2143   b->donotstash  = PETSC_FALSE;
2144   b->colmap      = PETSC_NULL;
2145   b->garray      = PETSC_NULL;
2146   b->roworiented = PETSC_TRUE;
2147 
2148   /* stuff used in block assembly */
2149   b->barray       = 0;
2150 
2151   /* stuff used for matrix vector multiply */
2152   b->lvec         = 0;
2153   b->Mvctx        = 0;
2154 
2155   /* stuff for MatGetRow() */
2156   b->rowindices   = 0;
2157   b->rowvalues    = 0;
2158   b->getrowactive = PETSC_FALSE;
2159 
2160   /* hash table stuff */
2161   b->ht           = 0;
2162   b->hd           = 0;
2163   b->ht_size      = 0;
2164   b->ht_flag      = PETSC_FALSE;
2165   b->ht_fact      = 0;
2166   b->ht_total_ct  = 0;
2167   b->ht_insert_ct = 0;
2168 
2169   *A = B;
2170   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2171   if (flg) {
2172     double fact = 1.39;
2173     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2174     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2175     if (fact <= 1.0) fact = 1.39;
2176     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2177     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2178   }
2179   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2180                                      "MatStoreValues_MPIBAIJ",
2181                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2182   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2183                                      "MatRetrieveValues_MPIBAIJ",
2184                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2185   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2186                                      "MatGetDiagonalBlock_MPIBAIJ",
2187                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 #undef __FUNC__
2192 #define  __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ"
2193 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2194 {
2195   Mat         mat;
2196   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2197   int         ierr,len=0;
2198   PetscTruth  flg;
2199 
2200   PetscFunctionBegin;
2201   *newmat       = 0;
2202   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2203   PLogObjectCreate(mat);
2204   mat->data         = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2205   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2206   mat->ops->destroy = MatDestroy_MPIBAIJ;
2207   mat->ops->view    = MatView_MPIBAIJ;
2208   mat->factor       = matin->factor;
2209   mat->assembled    = PETSC_TRUE;
2210   mat->insertmode   = NOT_SET_VALUES;
2211 
2212   a->m = mat->m   = oldmat->m;
2213   a->n = mat->n   = oldmat->n;
2214   a->M = mat->M   = oldmat->M;
2215   a->N = mat->N   = oldmat->N;
2216 
2217   a->bs  = oldmat->bs;
2218   a->bs2 = oldmat->bs2;
2219   a->mbs = oldmat->mbs;
2220   a->nbs = oldmat->nbs;
2221   a->Mbs = oldmat->Mbs;
2222   a->Nbs = oldmat->Nbs;
2223 
2224   a->rstart       = oldmat->rstart;
2225   a->rend         = oldmat->rend;
2226   a->cstart       = oldmat->cstart;
2227   a->cend         = oldmat->cend;
2228   a->size         = oldmat->size;
2229   a->rank         = oldmat->rank;
2230   a->donotstash   = oldmat->donotstash;
2231   a->roworiented  = oldmat->roworiented;
2232   a->rowindices   = 0;
2233   a->rowvalues    = 0;
2234   a->getrowactive = PETSC_FALSE;
2235   a->barray       = 0;
2236   a->rstart_bs    = oldmat->rstart_bs;
2237   a->rend_bs      = oldmat->rend_bs;
2238   a->cstart_bs    = oldmat->cstart_bs;
2239   a->cend_bs      = oldmat->cend_bs;
2240 
2241   /* hash table stuff */
2242   a->ht           = 0;
2243   a->hd           = 0;
2244   a->ht_size      = 0;
2245   a->ht_flag      = oldmat->ht_flag;
2246   a->ht_fact      = oldmat->ht_fact;
2247   a->ht_total_ct  = 0;
2248   a->ht_insert_ct = 0;
2249 
2250 
2251   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2252   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2253   a->cowners    = a->rowners + a->size + 2;
2254   a->rowners_bs = a->cowners + a->size + 2;
2255   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2256   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2257   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2258   if (oldmat->colmap) {
2259 #if defined (PETSC_USE_CTABLE)
2260   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2261 #else
2262     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2263     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2264     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2265 #endif
2266   } else a->colmap = 0;
2267   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2268     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2269     PLogObjectMemory(mat,len*sizeof(int));
2270     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2271   } else a->garray = 0;
2272 
2273   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2274   PLogObjectParent(mat,a->lvec);
2275   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2276 
2277   PLogObjectParent(mat,a->Mvctx);
2278   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2279   PLogObjectParent(mat,a->A);
2280   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2281   PLogObjectParent(mat,a->B);
2282   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2283   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2284   if (flg) {
2285     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2286   }
2287   *newmat = mat;
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #include "sys.h"
2292 
2293 #undef __FUNC__
2294 #define  __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ"
2295 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2296 {
2297   Mat          A;
2298   int          i,nz,ierr,j,rstart,rend,fd;
2299   Scalar       *vals,*buf;
2300   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2301   MPI_Status   status;
2302   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2303   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2304   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2305   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2306   int          dcount,kmax,k,nzcount,tmp;
2307 
2308   PetscFunctionBegin;
2309   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2310 
2311   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2312   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2313   if (!rank) {
2314     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2315     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2316     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2317     if (header[3] < 0) {
2318       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2319     }
2320   }
2321 
2322   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2323   M = header[1]; N = header[2];
2324 
2325   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2326 
2327   /*
2328      This code adds extra rows to make sure the number of rows is
2329      divisible by the blocksize
2330   */
2331   Mbs        = M/bs;
2332   extra_rows = bs - M + bs*(Mbs);
2333   if (extra_rows == bs) extra_rows = 0;
2334   else                  Mbs++;
2335   if (extra_rows &&!rank) {
2336     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2337   }
2338 
2339   /* determine ownership of all rows */
2340   mbs = Mbs/size + ((Mbs % size) > rank);
2341   m   = mbs * bs;
2342   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2343   browners = rowners + size + 1;
2344   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2345   rowners[0] = 0;
2346   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2347   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2348   rstart = rowners[rank];
2349   rend   = rowners[rank+1];
2350 
2351   /* distribute row lengths to all processors */
2352   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2353   if (!rank) {
2354     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2355     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2356     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2357     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2358     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2359     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2360     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2361   } else {
2362     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2363   }
2364 
2365   if (!rank) {
2366     /* calculate the number of nonzeros on each processor */
2367     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2368     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2369     for (i=0; i<size; i++) {
2370       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2371         procsnz[i] += rowlengths[j];
2372       }
2373     }
2374     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2375 
2376     /* determine max buffer needed and allocate it */
2377     maxnz = 0;
2378     for (i=0; i<size; i++) {
2379       maxnz = PetscMax(maxnz,procsnz[i]);
2380     }
2381     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2382 
2383     /* read in my part of the matrix column indices  */
2384     nz = procsnz[0];
2385     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2386     mycols = ibuf;
2387     if (size == 1)  nz -= extra_rows;
2388     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2389     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2390 
2391     /* read in every ones (except the last) and ship off */
2392     for (i=1; i<size-1; i++) {
2393       nz   = procsnz[i];
2394       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2395       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2396     }
2397     /* read in the stuff for the last proc */
2398     if (size != 1) {
2399       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2400       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2401       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2402       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2403     }
2404     ierr = PetscFree(cols);CHKERRQ(ierr);
2405   } else {
2406     /* determine buffer space needed for message */
2407     nz = 0;
2408     for (i=0; i<m; i++) {
2409       nz += locrowlens[i];
2410     }
2411     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2412     mycols = ibuf;
2413     /* receive message of column indices*/
2414     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2415     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2416     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2417   }
2418 
2419   /* loop over local rows, determining number of off diagonal entries */
2420   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2421   odlens = dlens + (rend-rstart);
2422   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2423   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2424   masked1 = mask    + Mbs;
2425   masked2 = masked1 + Mbs;
2426   rowcount = 0; nzcount = 0;
2427   for (i=0; i<mbs; i++) {
2428     dcount  = 0;
2429     odcount = 0;
2430     for (j=0; j<bs; j++) {
2431       kmax = locrowlens[rowcount];
2432       for (k=0; k<kmax; k++) {
2433         tmp = mycols[nzcount++]/bs;
2434         if (!mask[tmp]) {
2435           mask[tmp] = 1;
2436           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2437           else masked1[dcount++] = tmp;
2438         }
2439       }
2440       rowcount++;
2441     }
2442 
2443     dlens[i]  = dcount;
2444     odlens[i] = odcount;
2445 
2446     /* zero out the mask elements we set */
2447     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2448     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2449   }
2450 
2451   /* create our matrix */
2452   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2453   A = *newmat;
2454   MatSetOption(A,MAT_COLUMNS_SORTED);
2455 
2456   if (!rank) {
2457     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2458     /* read in my part of the matrix numerical values  */
2459     nz = procsnz[0];
2460     vals = buf;
2461     mycols = ibuf;
2462     if (size == 1)  nz -= extra_rows;
2463     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2464     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2465 
2466     /* insert into matrix */
2467     jj      = rstart*bs;
2468     for (i=0; i<m; i++) {
2469       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2470       mycols += locrowlens[i];
2471       vals   += locrowlens[i];
2472       jj++;
2473     }
2474     /* read in other processors (except the last one) and ship out */
2475     for (i=1; i<size-1; i++) {
2476       nz   = procsnz[i];
2477       vals = buf;
2478       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2479       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2480     }
2481     /* the last proc */
2482     if (size != 1){
2483       nz   = procsnz[i] - extra_rows;
2484       vals = buf;
2485       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2486       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2487       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2488     }
2489     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2490   } else {
2491     /* receive numeric values */
2492     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2493 
2494     /* receive message of values*/
2495     vals   = buf;
2496     mycols = ibuf;
2497     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2498     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2499     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2500 
2501     /* insert into matrix */
2502     jj      = rstart*bs;
2503     for (i=0; i<m; i++) {
2504       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2505       mycols += locrowlens[i];
2506       vals   += locrowlens[i];
2507       jj++;
2508     }
2509   }
2510   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2511   ierr = PetscFree(buf);CHKERRQ(ierr);
2512   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2513   ierr = PetscFree(rowners);CHKERRQ(ierr);
2514   ierr = PetscFree(dlens);CHKERRQ(ierr);
2515   ierr = PetscFree(mask);CHKERRQ(ierr);
2516   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2517   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 #undef __FUNC__
2522 #define  __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor"
2523 /*@
2524    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2525 
2526    Input Parameters:
2527 .  mat  - the matrix
2528 .  fact - factor
2529 
2530    Collective on Mat
2531 
2532    Level: advanced
2533 
2534   Notes:
2535    This can also be set by the command line option: -mat_use_hash_table fact
2536 
2537 .keywords: matrix, hashtable, factor, HT
2538 
2539 .seealso: MatSetOption()
2540 @*/
2541 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2542 {
2543   Mat_MPIBAIJ *baij;
2544 
2545   PetscFunctionBegin;
2546   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2547   if (mat->type != MATMPIBAIJ) {
2548     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2549   }
2550   baij = (Mat_MPIBAIJ*)mat->data;
2551   baij->ht_fact = fact;
2552   PetscFunctionReturn(0);
2553 }
2554