xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 93fea6af079e4055664eecaca9855c2a79060c37)
1 /*$Id: mpibaij.c,v 1.189 2000/04/09 04:36:25 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 extern int MatSetUpMultiply_MPIBAIJ(Mat);
7 extern int DisAssemble_MPIBAIJ(Mat);
8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15 extern int MatPrintHelp_SeqBAIJ(Mat);
16 extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27 extern int MatSetValuesBlock_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28 extern int MatSetValuesBlock_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 #else
30 #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ
31 #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
33 #endif
34 
35 EXTERN_C_BEGIN
36 #undef __FUNC__
37 #define  __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ"
38 int MatStoreValues_MPIBAIJ(Mat mat)
39 {
40   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
41   int         ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
45   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 EXTERN_C_END
49 
50 EXTERN_C_BEGIN
51 #undef __FUNC__
52 #define  __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ"
53 int MatRetrieveValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
56   int         ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
60   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 /*
66      Local utility routine that creates a mapping from the global column
67    number to the local number in the off-diagonal part of the local
68    storage of the matrix.  This is done in a non scable way since the
69    length of colmap equals the global matrix length.
70 */
71 #undef __FUNC__
72 #define  __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private"
73 static int CreateColmap_MPIBAIJ_Private(Mat mat)
74 {
75   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
76   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
77   int         nbs = B->nbs,i,bs=B->bs,ierr;
78 
79   PetscFunctionBegin;
80 #if defined (PETSC_USE_CTABLE)
81   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
82   for (i=0; i<nbs; i++){
83     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
84   }
85 #else
86   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
87   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
88   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
89   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
90 #endif
91   PetscFunctionReturn(0);
92 }
93 
94 #define CHUNKSIZE  10
95 
96 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
97 { \
98  \
99     brow = row/bs;  \
100     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
101     rmax = aimax[brow]; nrow = ailen[brow]; \
102       bcol = col/bs; \
103       ridx = row % bs; cidx = col % bs; \
104       low = 0; high = nrow; \
105       while (high-low > 3) { \
106         t = (low+high)/2; \
107         if (rp[t] > bcol) high = t; \
108         else              low  = t; \
109       } \
110       for (_i=low; _i<high; _i++) { \
111         if (rp[_i] > bcol) break; \
112         if (rp[_i] == bcol) { \
113           bap  = ap +  bs2*_i + bs*cidx + ridx; \
114           if (addv == ADD_VALUES) *bap += value;  \
115           else                    *bap  = value;  \
116           goto a_noinsert; \
117         } \
118       } \
119       if (a->nonew == 1) goto a_noinsert; \
120       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
121       if (nrow >= rmax) { \
122         /* there is no extra room in row, therefore enlarge */ \
123         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
124         MatScalar *new_a; \
125  \
126         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
127  \
128         /* malloc new storage space */ \
129         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
130         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
131         new_j   = (int*)(new_a + bs2*new_nz); \
132         new_i   = new_j + new_nz; \
133  \
134         /* copy over old data into new slots */ \
135         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
136         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
137         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
138         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
139         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
140         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
142         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
143                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
144         /* free up old matrix storage */ \
145         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
146         if (!a->singlemalloc) { \
147           ierr = PetscFree(a->i);CHKERRQ(ierr); \
148           ierr = PetscFree(a->j);CHKERRQ(ierr);\
149         } \
150         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
151         a->singlemalloc = PETSC_TRUE; \
152  \
153         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
154         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
155         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
156         a->maxnz += bs2*CHUNKSIZE; \
157         a->reallocs++; \
158         a->nz++; \
159       } \
160       N = nrow++ - 1;  \
161       /* shift up all the later entries in this row */ \
162       for (ii=N; ii>=_i; ii--) { \
163         rp[ii+1] = rp[ii]; \
164         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
165       } \
166       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
167       rp[_i]                      = bcol;  \
168       ap[bs2*_i + bs*cidx + ridx] = value;  \
169       a_noinsert:; \
170     ailen[brow] = nrow; \
171 }
172 
173 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
174 { \
175     brow = row/bs;  \
176     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
177     rmax = bimax[brow]; nrow = bilen[brow]; \
178       bcol = col/bs; \
179       ridx = row % bs; cidx = col % bs; \
180       low = 0; high = nrow; \
181       while (high-low > 3) { \
182         t = (low+high)/2; \
183         if (rp[t] > bcol) high = t; \
184         else              low  = t; \
185       } \
186       for (_i=low; _i<high; _i++) { \
187         if (rp[_i] > bcol) break; \
188         if (rp[_i] == bcol) { \
189           bap  = ap +  bs2*_i + bs*cidx + ridx; \
190           if (addv == ADD_VALUES) *bap += value;  \
191           else                    *bap  = value;  \
192           goto b_noinsert; \
193         } \
194       } \
195       if (b->nonew == 1) goto b_noinsert; \
196       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
197       if (nrow >= rmax) { \
198         /* there is no extra room in row, therefore enlarge */ \
199         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
200         MatScalar *new_a; \
201  \
202         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
203  \
204         /* malloc new storage space */ \
205         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
206         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
207         new_j   = (int*)(new_a + bs2*new_nz); \
208         new_i   = new_j + new_nz; \
209  \
210         /* copy over old data into new slots */ \
211         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
212         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
213         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
214         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
215         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
216         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
217         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
218         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
219                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
220         /* free up old matrix storage */ \
221         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
222         if (!b->singlemalloc) { \
223           ierr = PetscFree(b->i);CHKERRQ(ierr); \
224           ierr = PetscFree(b->j);CHKERRQ(ierr); \
225         } \
226         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
227         b->singlemalloc = PETSC_TRUE; \
228  \
229         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
230         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
231         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
232         b->maxnz += bs2*CHUNKSIZE; \
233         b->reallocs++; \
234         b->nz++; \
235       } \
236       N = nrow++ - 1;  \
237       /* shift up all the later entries in this row */ \
238       for (ii=N; ii>=_i; ii--) { \
239         rp[ii+1] = rp[ii]; \
240         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
241       } \
242       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
243       rp[_i]                      = bcol;  \
244       ap[bs2*_i + bs*cidx + ridx] = value;  \
245       b_noinsert:; \
246     bilen[brow] = nrow; \
247 }
248 
249 #if defined(PETSC_USE_MAT_SINGLE)
250 #undef __FUNC__
251 #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ"
252 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
253 {
254   int       ierr,i,N = m*n;
255   MatScalar *vsingle = (MatScalar*)v;
256 
257   PetscFunctionBegin;
258   for (i=0; i<N; i++) {
259     vsingle[i] = v[i];
260   }
261   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
262   PetscFunctionReturn(0);
263 }
264 
265 #undef __FUNC__
266 #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ"
267 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
268 {
269   int       ierr,i,N = m*n*((Mat_MPIBAIJ*)mat->data)->bs2;
270   MatScalar *vsingle = (MatScalar*)v;
271 
272   PetscFunctionBegin;
273   for (i=0; i<N; i++) {
274     vsingle[i] = v[i];
275   }
276   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
277   PetscFunctionReturn(0);
278 }
279 #endif
280 
281 #undef __FUNC__
282 #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ"
283 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
284 {
285   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
286   MatScalar   value;
287   int         ierr,i,j,row,col;
288   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
289   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
290   int         cend_orig=baij->cend_bs,bs=baij->bs;
291 
292   /* Some Variables required in the macro */
293   Mat         A = baij->A;
294   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
295   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
296   MatScalar   *aa=a->a;
297 
298   Mat         B = baij->B;
299   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
300   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
301   MatScalar   *ba=b->a;
302 
303   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
304   int         low,high,t,ridx,cidx,bs2=a->bs2;
305   MatScalar   *ap,*bap;
306 
307   PetscFunctionBegin;
308   for (i=0; i<m; i++) {
309     if (im[i] < 0) continue;
310 #if defined(PETSC_USE_BOPT_g)
311     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
312 #endif
313     if (im[i] >= rstart_orig && im[i] < rend_orig) {
314       row = im[i] - rstart_orig;
315       for (j=0; j<n; j++) {
316         if (in[j] >= cstart_orig && in[j] < cend_orig){
317           col = in[j] - cstart_orig;
318           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
319           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
320           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
321         } else if (in[j] < 0) continue;
322 #if defined(PETSC_USE_BOPT_g)
323         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
324 #endif
325         else {
326           if (mat->was_assembled) {
327             if (!baij->colmap) {
328               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
329             }
330 #if defined (PETSC_USE_CTABLE)
331             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
332             col  = col - 1 + in[j]%bs;
333 #else
334             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
335 #endif
336             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
337               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
338               col =  in[j];
339               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
340               B = baij->B;
341               b = (Mat_SeqBAIJ*)(B)->data;
342               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
343               ba=b->a;
344             }
345           } else col = in[j];
346           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
347           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
348           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
349         }
350       }
351     } else {
352       if (!baij->donotstash) {
353         if (roworiented) {
354           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
355         } else {
356           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
357         }
358       }
359     }
360   }
361   PetscFunctionReturn(0);
362 }
363 
364 #undef __FUNC__
365 #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ"
366 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
367 {
368   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
369   MatScalar   *value,*barray=baij->barray;
370   int         ierr,i,j,ii,jj,row,col;
371   int         roworiented = baij->roworiented,rstart=baij->rstart ;
372   int         rend=baij->rend,cstart=baij->cstart,stepval;
373   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
374 
375   PetscFunctionBegin;
376   if(!barray) {
377     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
378   }
379 
380   if (roworiented) {
381     stepval = (n-1)*bs;
382   } else {
383     stepval = (m-1)*bs;
384   }
385   for (i=0; i<m; i++) {
386     if (im[i] < 0) continue;
387 #if defined(PETSC_USE_BOPT_g)
388     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
389 #endif
390     if (im[i] >= rstart && im[i] < rend) {
391       row = im[i] - rstart;
392       for (j=0; j<n; j++) {
393         /* If NumCol = 1 then a copy is not required */
394         if ((roworiented) && (n == 1)) {
395           barray = v + i*bs2;
396         } else if((!roworiented) && (m == 1)) {
397           barray = v + j*bs2;
398         } else { /* Here a copy is required */
399           if (roworiented) {
400             value = v + i*(stepval+bs)*bs + j*bs;
401           } else {
402             value = v + j*(stepval+bs)*bs + i*bs;
403           }
404           for (ii=0; ii<bs; ii++,value+=stepval) {
405             for (jj=0; jj<bs; jj++) {
406               *barray++  = *value++;
407             }
408           }
409           barray -=bs2;
410         }
411 
412         if (in[j] >= cstart && in[j] < cend){
413           col  = in[j] - cstart;
414           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
415         }
416         else if (in[j] < 0) continue;
417 #if defined(PETSC_USE_BOPT_g)
418         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
419 #endif
420         else {
421           if (mat->was_assembled) {
422             if (!baij->colmap) {
423               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
424             }
425 
426 #if defined(PETSC_USE_BOPT_g)
427 #if defined (PETSC_USE_CTABLE)
428             { int data;
429               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
430               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
431             }
432 #else
433             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
434 #endif
435 #endif
436 #if defined (PETSC_USE_CTABLE)
437 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
438             col  = (col - 1)/bs;
439 #else
440             col = (baij->colmap[in[j]] - 1)/bs;
441 #endif
442             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
443               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
444               col =  in[j];
445             }
446           }
447           else col = in[j];
448           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
449         }
450       }
451     } else {
452       if (!baij->donotstash) {
453         if (roworiented) {
454           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
455         } else {
456           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
457         }
458       }
459     }
460   }
461   PetscFunctionReturn(0);
462 }
463 #define HASH_KEY 0.6180339887
464 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
465 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
466 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
467 #undef __FUNC__
468 #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT"
469 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
470 {
471   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
472   int         ierr,i,j,row,col;
473   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
474   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
475   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
476   PetscReal   tmp;
477   MatScalar   ** HD = baij->hd,value;
478 #if defined(PETSC_USE_BOPT_g)
479   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
480 #endif
481 
482   PetscFunctionBegin;
483 
484   for (i=0; i<m; i++) {
485 #if defined(PETSC_USE_BOPT_g)
486     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
487     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
488 #endif
489       row = im[i];
490     if (row >= rstart_orig && row < rend_orig) {
491       for (j=0; j<n; j++) {
492         col = in[j];
493         if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m];
494         /* Look up into the Hash Table */
495         key = (row/bs)*Nbs+(col/bs)+1;
496         h1  = HASH(size,key,tmp);
497 
498 
499         idx = h1;
500 #if defined(PETSC_USE_BOPT_g)
501         insert_ct++;
502         total_ct++;
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
507             if (idx == h1) {
508               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
509             }
510           }
511         }
512 #else
513         if (HT[idx] != key) {
514           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
515           if (idx == size) {
516             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
517             if (idx == h1) {
518               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
519             }
520           }
521         }
522 #endif
523         /* A HASH table entry is found, so insert the values at the correct address */
524         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
525         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
526       }
527     } else {
528       if (!baij->donotstash) {
529         if (roworiented) {
530           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
531         } else {
532           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
533         }
534       }
535     }
536   }
537 #if defined(PETSC_USE_BOPT_g)
538   baij->ht_total_ct = total_ct;
539   baij->ht_insert_ct = insert_ct;
540 #endif
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNC__
545 #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT"
546 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
547 {
548   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
549   int         ierr,i,j,ii,jj,row,col;
550   int         roworiented = baij->roworiented,rstart=baij->rstart ;
551   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
552   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
553   PetscReal   tmp;
554   MatScalar   **HD = baij->hd,*baij_a;
555   Scalar      *v_t,*value;
556 #if defined(PETSC_USE_BOPT_g)
557   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
558 #endif
559 
560   PetscFunctionBegin;
561 
562   if (roworiented) {
563     stepval = (n-1)*bs;
564   } else {
565     stepval = (m-1)*bs;
566   }
567   for (i=0; i<m; i++) {
568 #if defined(PETSC_USE_BOPT_g)
569     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
570     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
571 #endif
572     row   = im[i];
573     v_t   = v + i*bs2;
574     if (row >= rstart && row < rend) {
575       for (j=0; j<n; j++) {
576         col = in[j];
577 
578         /* Look up into the Hash Table */
579         key = row*Nbs+col+1;
580         h1  = HASH(size,key,tmp);
581 
582         idx = h1;
583 #if defined(PETSC_USE_BOPT_g)
584         total_ct++;
585         insert_ct++;
586        if (HT[idx] != key) {
587           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
588           if (idx == size) {
589             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
590             if (idx == h1) {
591               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
592             }
593           }
594         }
595 #else
596         if (HT[idx] != key) {
597           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
598           if (idx == size) {
599             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
600             if (idx == h1) {
601               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
602             }
603           }
604         }
605 #endif
606         baij_a = HD[idx];
607         if (roworiented) {
608           /*value = v + i*(stepval+bs)*bs + j*bs;*/
609           /* value = v + (i*(stepval+bs)+j)*bs; */
610           value = v_t;
611           v_t  += bs;
612           if (addv == ADD_VALUES) {
613             for (ii=0; ii<bs; ii++,value+=stepval) {
614               for (jj=ii; jj<bs2; jj+=bs) {
615                 baij_a[jj]  += *value++;
616               }
617             }
618           } else {
619             for (ii=0; ii<bs; ii++,value+=stepval) {
620               for (jj=ii; jj<bs2; jj+=bs) {
621                 baij_a[jj]  = *value++;
622               }
623             }
624           }
625         } else {
626           value = v + j*(stepval+bs)*bs + i*bs;
627           if (addv == ADD_VALUES) {
628             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
629               for (jj=0; jj<bs; jj++) {
630                 baij_a[jj]  += *value++;
631               }
632             }
633           } else {
634             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
635               for (jj=0; jj<bs; jj++) {
636                 baij_a[jj]  = *value++;
637               }
638             }
639           }
640         }
641       }
642     } else {
643       if (!baij->donotstash) {
644         if (roworiented) {
645           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
646         } else {
647           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
648         }
649       }
650     }
651   }
652 #if defined(PETSC_USE_BOPT_g)
653   baij->ht_total_ct = total_ct;
654   baij->ht_insert_ct = insert_ct;
655 #endif
656   PetscFunctionReturn(0);
657 }
658 
659 #undef __FUNC__
660 #define  __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ"
661 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
662 {
663   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
664   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
665   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
666 
667   PetscFunctionBegin;
668   for (i=0; i<m; i++) {
669     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
670     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
671     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
672       row = idxm[i] - bsrstart;
673       for (j=0; j<n; j++) {
674         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
675         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
676         if (idxn[j] >= bscstart && idxn[j] < bscend){
677           col = idxn[j] - bscstart;
678           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
679         } else {
680           if (!baij->colmap) {
681             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
682           }
683 #if defined (PETSC_USE_CTABLE)
684           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
685           data --;
686 #else
687           data = baij->colmap[idxn[j]/bs]-1;
688 #endif
689           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
690           else {
691             col  = data + idxn[j]%bs;
692             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
693           }
694         }
695       }
696     } else {
697       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
698     }
699   }
700  PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNC__
704 #define  __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ"
705 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
706 {
707   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
708   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
709   int        ierr,i,bs2=baij->bs2;
710   PetscReal  sum = 0.0;
711   MatScalar  *v;
712 
713   PetscFunctionBegin;
714   if (baij->size == 1) {
715     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
716   } else {
717     if (type == NORM_FROBENIUS) {
718       v = amat->a;
719       for (i=0; i<amat->nz*bs2; i++) {
720 #if defined(PETSC_USE_COMPLEX)
721         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
722 #else
723         sum += (*v)*(*v); v++;
724 #endif
725       }
726       v = bmat->a;
727       for (i=0; i<bmat->nz*bs2; i++) {
728 #if defined(PETSC_USE_COMPLEX)
729         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
730 #else
731         sum += (*v)*(*v); v++;
732 #endif
733       }
734       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
735       *norm = sqrt(*norm);
736     } else {
737       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
738     }
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 
744 /*
745   Creates the hash table, and sets the table
746   This table is created only once.
747   If new entried need to be added to the matrix
748   then the hash table has to be destroyed and
749   recreated.
750 */
751 #undef __FUNC__
752 #define  __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private"
753 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
754 {
755   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
756   Mat         A = baij->A,B=baij->B;
757   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
758   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
759   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
760   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
761   int         *HT,key;
762   MatScalar   **HD;
763   PetscReal   tmp;
764 #if defined(PETSC_USE_BOPT_g)
765   int         ct=0,max=0;
766 #endif
767 
768   PetscFunctionBegin;
769   baij->ht_size=(int)(factor*nz);
770   size = baij->ht_size;
771 
772   if (baij->ht) {
773     PetscFunctionReturn(0);
774   }
775 
776   /* Allocate Memory for Hash Table */
777   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
778   baij->ht = (int*)(baij->hd + size);
779   HD = baij->hd;
780   HT = baij->ht;
781 
782 
783   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
784 
785 
786   /* Loop Over A */
787   for (i=0; i<a->mbs; i++) {
788     for (j=ai[i]; j<ai[i+1]; j++) {
789       row = i+rstart;
790       col = aj[j]+cstart;
791 
792       key = row*Nbs + col + 1;
793       h1  = HASH(size,key,tmp);
794       for (k=0; k<size; k++){
795         if (HT[(h1+k)%size] == 0.0) {
796           HT[(h1+k)%size] = key;
797           HD[(h1+k)%size] = a->a + j*bs2;
798           break;
799 #if defined(PETSC_USE_BOPT_g)
800         } else {
801           ct++;
802 #endif
803         }
804       }
805 #if defined(PETSC_USE_BOPT_g)
806       if (k> max) max = k;
807 #endif
808     }
809   }
810   /* Loop Over B */
811   for (i=0; i<b->mbs; i++) {
812     for (j=bi[i]; j<bi[i+1]; j++) {
813       row = i+rstart;
814       col = garray[bj[j]];
815       key = row*Nbs + col + 1;
816       h1  = HASH(size,key,tmp);
817       for (k=0; k<size; k++){
818         if (HT[(h1+k)%size] == 0.0) {
819           HT[(h1+k)%size] = key;
820           HD[(h1+k)%size] = b->a + j*bs2;
821           break;
822 #if defined(PETSC_USE_BOPT_g)
823         } else {
824           ct++;
825 #endif
826         }
827       }
828 #if defined(PETSC_USE_BOPT_g)
829       if (k> max) max = k;
830 #endif
831     }
832   }
833 
834   /* Print Summary */
835 #if defined(PETSC_USE_BOPT_g)
836   for (i=0,j=0; i<size; i++) {
837     if (HT[i]) {j++;}
838   }
839   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
840 #endif
841   PetscFunctionReturn(0);
842 }
843 
844 #undef __FUNC__
845 #define  __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ"
846 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
847 {
848   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
849   int         ierr,nstash,reallocs;
850   InsertMode  addv;
851 
852   PetscFunctionBegin;
853   if (baij->donotstash) {
854     PetscFunctionReturn(0);
855   }
856 
857   /* make sure all processors are either in INSERTMODE or ADDMODE */
858   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
859   if (addv == (ADD_VALUES|INSERT_VALUES)) {
860     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
861   }
862   mat->insertmode = addv; /* in case this processor had no cache */
863 
864   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
865   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
866   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
867   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
868   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
869   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNC__
874 #define  __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ"
875 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
876 {
877   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
878   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
879   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
880   int         *row,*col,other_disassembled;
881   PetscTruth  r1,r2,r3;
882   MatScalar   *val;
883   InsertMode  addv = mat->insertmode;
884 
885   PetscFunctionBegin;
886   if (!baij->donotstash) {
887     while (1) {
888       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
889       if (!flg) break;
890 
891       for (i=0; i<n;) {
892         /* Now identify the consecutive vals belonging to the same row */
893         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
894         if (j < n) ncols = j-i;
895         else       ncols = n-i;
896         /* Now assemble all these values with a single function call */
897         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
898         i = j;
899       }
900     }
901     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
902     /* Now process the block-stash. Since the values are stashed column-oriented,
903        set the roworiented flag to column oriented, and after MatSetValues()
904        restore the original flags */
905     r1 = baij->roworiented;
906     r2 = a->roworiented;
907     r3 = b->roworiented;
908     baij->roworiented = PETSC_FALSE;
909     a->roworiented    = PETSC_FALSE;
910     b->roworiented    = PETSC_FALSE;
911     while (1) {
912       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
913       if (!flg) break;
914 
915       for (i=0; i<n;) {
916         /* Now identify the consecutive vals belonging to the same row */
917         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
918         if (j < n) ncols = j-i;
919         else       ncols = n-i;
920         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
921         i = j;
922       }
923     }
924     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
925     baij->roworiented = r1;
926     a->roworiented    = r2;
927     b->roworiented    = r3;
928   }
929 
930   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
932 
933   /* determine if any processor has disassembled, if so we must
934      also disassemble ourselfs, in order that we may reassemble. */
935   /*
936      if nonzero structure of submatrix B cannot change then we know that
937      no processor disassembled thus we can skip this stuff
938   */
939   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
940     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
941     if (mat->was_assembled && !other_disassembled) {
942       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
943     }
944   }
945 
946   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
947     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
948   }
949   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
950   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
951 
952 #if defined(PETSC_USE_BOPT_g)
953   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
954     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
955     baij->ht_total_ct  = 0;
956     baij->ht_insert_ct = 0;
957   }
958 #endif
959   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
960     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
961     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
962     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
963   }
964 
965   if (baij->rowvalues) {
966     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
967     baij->rowvalues = 0;
968   }
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNC__
973 #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_Binary"
974 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
975 {
976   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
977   int          ierr;
978 
979   PetscFunctionBegin;
980   if (baij->size == 1) {
981     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
982   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNC__
987 #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket"
988 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
989 {
990   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
991   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
992   PetscTruth   isascii,isdraw;
993 
994   PetscFunctionBegin;
995   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
996   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
997   if (isascii) {
998     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
999     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1000       MatInfo info;
1001       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1002       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1003       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1004               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1005               baij->bs,(int)info.memory);CHKERRQ(ierr);
1006       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1007       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1008       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1009       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1010       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
1011       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1012       PetscFunctionReturn(0);
1013     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1014       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1015       PetscFunctionReturn(0);
1016     }
1017   }
1018 
1019   if (isdraw) {
1020     Draw       draw;
1021     PetscTruth isnull;
1022     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1023     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1024   }
1025 
1026   if (size == 1) {
1027     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1028   } else {
1029     /* assemble the entire matrix onto first processor. */
1030     Mat         A;
1031     Mat_SeqBAIJ *Aloc;
1032     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1033     MatScalar   *a;
1034 
1035     if (!rank) {
1036       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1037     } else {
1038       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1039     }
1040     PLogObjectParent(mat,A);
1041 
1042     /* copy over the A part */
1043     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
1044     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1045     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1046 
1047     for (i=0; i<mbs; i++) {
1048       rvals[0] = bs*(baij->rstart + i);
1049       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1050       for (j=ai[i]; j<ai[i+1]; j++) {
1051         col = (baij->cstart+aj[j])*bs;
1052         for (k=0; k<bs; k++) {
1053           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1054           col++; a += bs;
1055         }
1056       }
1057     }
1058     /* copy over the B part */
1059     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1060     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1061     for (i=0; i<mbs; i++) {
1062       rvals[0] = bs*(baij->rstart + i);
1063       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1064       for (j=ai[i]; j<ai[i+1]; j++) {
1065         col = baij->garray[aj[j]]*bs;
1066         for (k=0; k<bs; k++) {
1067           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1068           col++; a += bs;
1069         }
1070       }
1071     }
1072     ierr = PetscFree(rvals);CHKERRQ(ierr);
1073     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1074     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1075     /*
1076        Everyone has to call to draw the matrix since the graphics waits are
1077        synchronized across all processors that share the Draw object
1078     */
1079     if (!rank) {
1080       Viewer sviewer;
1081       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1082       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1083       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1084     }
1085     ierr = MatDestroy(A);CHKERRQ(ierr);
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 
1091 
1092 #undef __FUNC__
1093 #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ"
1094 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1095 {
1096   int        ierr;
1097   PetscTruth isascii,isdraw,issocket,isbinary;
1098 
1099   PetscFunctionBegin;
1100   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1101   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1102   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1103   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1104   if (isascii || isdraw || issocket || isbinary) {
1105     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1106   } else {
1107     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1108   }
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNC__
1113 #define  __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ"
1114 int MatDestroy_MPIBAIJ(Mat mat)
1115 {
1116   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1117   int         ierr;
1118 
1119   PetscFunctionBegin;
1120 
1121   if (mat->mapping) {
1122     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1123   }
1124   if (mat->bmapping) {
1125     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1126   }
1127   if (mat->rmap) {
1128     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1129   }
1130   if (mat->cmap) {
1131     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1132   }
1133 #if defined(PETSC_USE_LOG)
1134   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1135 #endif
1136 
1137   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1138   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1139 
1140   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1141   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1142   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1143 #if defined (PETSC_USE_CTABLE)
1144   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1145 #else
1146   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1147 #endif
1148   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1149   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1150   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1151   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1152   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1153   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1154   ierr = PetscFree(baij);CHKERRQ(ierr);
1155   PLogObjectDestroy(mat);
1156   PetscHeaderDestroy(mat);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNC__
1161 #define  __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ"
1162 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1163 {
1164   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1165   int         ierr,nt;
1166 
1167   PetscFunctionBegin;
1168   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1169   if (nt != a->n) {
1170     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1171   }
1172   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1173   if (nt != a->m) {
1174     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1175   }
1176   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1177   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1178   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1179   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1180   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNC__
1185 #define  __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ"
1186 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1187 {
1188   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1189   int        ierr;
1190 
1191   PetscFunctionBegin;
1192   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1193   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1194   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1195   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNC__
1200 #define  __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ"
1201 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1202 {
1203   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1204   int         ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1213   /* receive remote parts: note this assumes the values are not actually */
1214   /* inserted in yy until the next line, which is true for my implementation*/
1215   /* but is not perhaps always true. */
1216   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNC__
1221 #define  __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ"
1222 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1223 {
1224   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1225   int         ierr;
1226 
1227   PetscFunctionBegin;
1228   /* do nondiagonal part */
1229   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1230   /* send it on its way */
1231   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1232   /* do local part */
1233   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1234   /* receive remote parts: note this assumes the values are not actually */
1235   /* inserted in yy until the next line, which is true for my implementation*/
1236   /* but is not perhaps always true. */
1237   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 /*
1242   This only works correctly for square matrices where the subblock A->A is the
1243    diagonal block
1244 */
1245 #undef __FUNC__
1246 #define  __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ"
1247 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1248 {
1249   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1250   int         ierr;
1251 
1252   PetscFunctionBegin;
1253   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNC__
1259 #define  __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ"
1260 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1261 {
1262   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1263   int         ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1267   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNC__
1272 #define  __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ"
1273 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1274 {
1275   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1276 
1277   PetscFunctionBegin;
1278   if (m) *m = mat->M;
1279   if (n) *n = mat->N;
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNC__
1284 #define  __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ"
1285 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1286 {
1287   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1288 
1289   PetscFunctionBegin;
1290   *m = mat->m; *n = mat->n;
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNC__
1295 #define  __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ"
1296 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1297 {
1298   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1299 
1300   PetscFunctionBegin;
1301   if (m) *m = mat->rstart*mat->bs;
1302   if (n) *n = mat->rend*mat->bs;
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNC__
1307 #define  __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ"
1308 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1309 {
1310   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1311   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1312   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1313   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1314   int        *cmap,*idx_p,cstart = mat->cstart;
1315 
1316   PetscFunctionBegin;
1317   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1318   mat->getrowactive = PETSC_TRUE;
1319 
1320   if (!mat->rowvalues && (idx || v)) {
1321     /*
1322         allocate enough space to hold information from the longest row.
1323     */
1324     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1325     int     max = 1,mbs = mat->mbs,tmp;
1326     for (i=0; i<mbs; i++) {
1327       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1328       if (max < tmp) { max = tmp; }
1329     }
1330     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1331     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1332   }
1333 
1334   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1335   lrow = row - brstart;
1336 
1337   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1338   if (!v)   {pvA = 0; pvB = 0;}
1339   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1340   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1341   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1342   nztot = nzA + nzB;
1343 
1344   cmap  = mat->garray;
1345   if (v  || idx) {
1346     if (nztot) {
1347       /* Sort by increasing column numbers, assuming A and B already sorted */
1348       int imark = -1;
1349       if (v) {
1350         *v = v_p = mat->rowvalues;
1351         for (i=0; i<nzB; i++) {
1352           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1353           else break;
1354         }
1355         imark = i;
1356         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1357         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1358       }
1359       if (idx) {
1360         *idx = idx_p = mat->rowindices;
1361         if (imark > -1) {
1362           for (i=0; i<imark; i++) {
1363             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1364           }
1365         } else {
1366           for (i=0; i<nzB; i++) {
1367             if (cmap[cworkB[i]/bs] < cstart)
1368               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1369             else break;
1370           }
1371           imark = i;
1372         }
1373         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1374         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1375       }
1376     } else {
1377       if (idx) *idx = 0;
1378       if (v)   *v   = 0;
1379     }
1380   }
1381   *nz = nztot;
1382   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1383   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNC__
1388 #define  __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ"
1389 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1390 {
1391   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1392 
1393   PetscFunctionBegin;
1394   if (baij->getrowactive == PETSC_FALSE) {
1395     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1396   }
1397   baij->getrowactive = PETSC_FALSE;
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNC__
1402 #define  __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ"
1403 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1404 {
1405   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1406 
1407   PetscFunctionBegin;
1408   *bs = baij->bs;
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNC__
1413 #define  __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ"
1414 int MatZeroEntries_MPIBAIJ(Mat A)
1415 {
1416   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1417   int         ierr;
1418 
1419   PetscFunctionBegin;
1420   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1421   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNC__
1426 #define  __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ"
1427 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1428 {
1429   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1430   Mat         A = a->A,B = a->B;
1431   int         ierr;
1432   PetscReal   isend[5],irecv[5];
1433 
1434   PetscFunctionBegin;
1435   info->block_size     = (double)a->bs;
1436   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1437   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1438   isend[3] = info->memory;  isend[4] = info->mallocs;
1439   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1440   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1441   isend[3] += info->memory;  isend[4] += info->mallocs;
1442   if (flag == MAT_LOCAL) {
1443     info->nz_used      = isend[0];
1444     info->nz_allocated = isend[1];
1445     info->nz_unneeded  = isend[2];
1446     info->memory       = isend[3];
1447     info->mallocs      = isend[4];
1448   } else if (flag == MAT_GLOBAL_MAX) {
1449     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1450     info->nz_used      = irecv[0];
1451     info->nz_allocated = irecv[1];
1452     info->nz_unneeded  = irecv[2];
1453     info->memory       = irecv[3];
1454     info->mallocs      = irecv[4];
1455   } else if (flag == MAT_GLOBAL_SUM) {
1456     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1457     info->nz_used      = irecv[0];
1458     info->nz_allocated = irecv[1];
1459     info->nz_unneeded  = irecv[2];
1460     info->memory       = irecv[3];
1461     info->mallocs      = irecv[4];
1462   } else {
1463     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1464   }
1465   info->rows_global       = (double)a->M;
1466   info->columns_global    = (double)a->N;
1467   info->rows_local        = (double)a->m;
1468   info->columns_local     = (double)a->N;
1469   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1470   info->fill_ratio_needed = 0;
1471   info->factor_mallocs    = 0;
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNC__
1476 #define  __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ"
1477 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1478 {
1479   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1480   int         ierr;
1481 
1482   PetscFunctionBegin;
1483   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1484       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1485       op == MAT_COLUMNS_UNSORTED ||
1486       op == MAT_COLUMNS_SORTED ||
1487       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1488       op == MAT_KEEP_ZEROED_ROWS ||
1489       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1490         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1491         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1492   } else if (op == MAT_ROW_ORIENTED) {
1493         a->roworiented = PETSC_TRUE;
1494         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1495         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1496   } else if (op == MAT_ROWS_SORTED ||
1497              op == MAT_ROWS_UNSORTED ||
1498              op == MAT_SYMMETRIC ||
1499              op == MAT_STRUCTURALLY_SYMMETRIC ||
1500              op == MAT_YES_NEW_DIAGONALS ||
1501              op == MAT_USE_HASH_TABLE) {
1502     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1503   } else if (op == MAT_COLUMN_ORIENTED) {
1504     a->roworiented = PETSC_FALSE;
1505     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1506     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1507   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1508     a->donotstash = PETSC_TRUE;
1509   } else if (op == MAT_NO_NEW_DIAGONALS) {
1510     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1511   } else if (op == MAT_USE_HASH_TABLE) {
1512     a->ht_flag = PETSC_TRUE;
1513   } else {
1514     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1515   }
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNC__
1520 #define  __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ("
1521 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1522 {
1523   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1524   Mat_SeqBAIJ *Aloc;
1525   Mat         B;
1526   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1527   int         bs=baij->bs,mbs=baij->mbs;
1528   MatScalar   *a;
1529 
1530   PetscFunctionBegin;
1531   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1532   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1533 
1534   /* copy over the A part */
1535   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1536   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1537   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1538 
1539   for (i=0; i<mbs; i++) {
1540     rvals[0] = bs*(baij->rstart + i);
1541     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1542     for (j=ai[i]; j<ai[i+1]; j++) {
1543       col = (baij->cstart+aj[j])*bs;
1544       for (k=0; k<bs; k++) {
1545         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1546         col++; a += bs;
1547       }
1548     }
1549   }
1550   /* copy over the B part */
1551   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1552   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1553   for (i=0; i<mbs; i++) {
1554     rvals[0] = bs*(baij->rstart + i);
1555     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1556     for (j=ai[i]; j<ai[i+1]; j++) {
1557       col = baij->garray[aj[j]]*bs;
1558       for (k=0; k<bs; k++) {
1559         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1560         col++; a += bs;
1561       }
1562     }
1563   }
1564   ierr = PetscFree(rvals);CHKERRQ(ierr);
1565   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567 
1568   if (matout) {
1569     *matout = B;
1570   } else {
1571     PetscOps *Abops;
1572     MatOps   Aops;
1573 
1574     /* This isn't really an in-place transpose .... but free data structures from baij */
1575     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1576     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1577     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1578 #if defined (PETSC_USE_CTABLE)
1579     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1580 #else
1581     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1582 #endif
1583     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1584     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1585     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1586     ierr = PetscFree(baij);CHKERRQ(ierr);
1587 
1588     /*
1589        This is horrible, horrible code. We need to keep the
1590       A pointers for the bops and ops but copy everything
1591       else from C.
1592     */
1593     Abops   = A->bops;
1594     Aops    = A->ops;
1595     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1596     A->bops = Abops;
1597     A->ops  = Aops;
1598 
1599     PetscHeaderDestroy(B);
1600   }
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 #undef __FUNC__
1605 #define  __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ"
1606 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1607 {
1608   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1609   Mat         a = baij->A,b = baij->B;
1610   int         ierr,s1,s2,s3;
1611 
1612   PetscFunctionBegin;
1613   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1614   if (rr) {
1615     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1616     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1617     /* Overlap communication with computation. */
1618     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1619   }
1620   if (ll) {
1621     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1622     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1623     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1624   }
1625   /* scale  the diagonal block */
1626   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1627 
1628   if (rr) {
1629     /* Do a scatter end and then right scale the off-diagonal block */
1630     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1631     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1632   }
1633 
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNC__
1638 #define  __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ"
1639 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1640 {
1641   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1642   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1643   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1644   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1645   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1646   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1647   MPI_Comm       comm = A->comm;
1648   MPI_Request    *send_waits,*recv_waits;
1649   MPI_Status     recv_status,*send_status;
1650   IS             istmp;
1651 
1652   PetscFunctionBegin;
1653   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1654   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1655 
1656   /*  first count number of contributors to each processor */
1657   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1658   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1659   procs  = nprocs + size;
1660   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1661   for (i=0; i<N; i++) {
1662     idx   = rows[i];
1663     found = 0;
1664     for (j=0; j<size; j++) {
1665       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1666         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1667       }
1668     }
1669     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1670   }
1671   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1672 
1673   /* inform other processors of number of messages and max length*/
1674   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1675   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1676   nmax   = work[rank];
1677   nrecvs = work[size+rank];
1678   ierr = PetscFree(work);CHKERRQ(ierr);
1679 
1680   /* post receives:   */
1681   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1682   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1683   for (i=0; i<nrecvs; i++) {
1684     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1685   }
1686 
1687   /* do sends:
1688      1) starts[i] gives the starting index in svalues for stuff going to
1689      the ith processor
1690   */
1691   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1692   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1693   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1694   starts[0]  = 0;
1695   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1696   for (i=0; i<N; i++) {
1697     svalues[starts[owner[i]]++] = rows[i];
1698   }
1699   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1700 
1701   starts[0] = 0;
1702   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1703   count = 0;
1704   for (i=0; i<size; i++) {
1705     if (procs[i]) {
1706       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1707     }
1708   }
1709   ierr = PetscFree(starts);CHKERRQ(ierr);
1710 
1711   base = owners[rank]*bs;
1712 
1713   /*  wait on receives */
1714   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1715   source = lens + nrecvs;
1716   count  = nrecvs; slen = 0;
1717   while (count) {
1718     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1719     /* unpack receives into our local space */
1720     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1721     source[imdex]  = recv_status.MPI_SOURCE;
1722     lens[imdex]    = n;
1723     slen          += n;
1724     count--;
1725   }
1726   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1727 
1728   /* move the data into the send scatter */
1729   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1730   count = 0;
1731   for (i=0; i<nrecvs; i++) {
1732     values = rvalues + i*nmax;
1733     for (j=0; j<lens[i]; j++) {
1734       lrows[count++] = values[j] - base;
1735     }
1736   }
1737   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1738   ierr = PetscFree(lens);CHKERRQ(ierr);
1739   ierr = PetscFree(owner);CHKERRQ(ierr);
1740   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1741 
1742   /* actually zap the local rows */
1743   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1744   PLogObjectParent(A,istmp);
1745 
1746   /*
1747         Zero the required rows. If the "diagonal block" of the matrix
1748      is square and the user wishes to set the diagonal we use seperate
1749      code so that MatSetValues() is not called for each diagonal allocating
1750      new memory, thus calling lots of mallocs and slowing things down.
1751 
1752        Contributed by: Mathew Knepley
1753   */
1754   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1755   ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr);
1756   if (diag && (l->A->M == l->A->N)) {
1757     ierr      = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1758   } else if (diag) {
1759     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1760     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1761       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1762 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1763     }
1764     for (i = 0; i < slen; i++) {
1765       row  = lrows[i] + rstart_bs;
1766       ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1767     }
1768     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1769     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1770   } else {
1771     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1772   }
1773 
1774   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1775   ierr = PetscFree(lrows);CHKERRQ(ierr);
1776 
1777   /* wait on sends */
1778   if (nsends) {
1779     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1780     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1781     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1782   }
1783   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1784   ierr = PetscFree(svalues);CHKERRQ(ierr);
1785 
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 #undef __FUNC__
1790 #define  __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ"
1791 int MatPrintHelp_MPIBAIJ(Mat A)
1792 {
1793   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1794   MPI_Comm    comm = A->comm;
1795   static int  called = 0;
1796   int         ierr;
1797 
1798   PetscFunctionBegin;
1799   if (!a->rank) {
1800     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1801   }
1802   if (called) {PetscFunctionReturn(0);} else called = 1;
1803   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1804   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNC__
1809 #define  __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ"
1810 int MatSetUnfactored_MPIBAIJ(Mat A)
1811 {
1812   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1813   int         ierr;
1814 
1815   PetscFunctionBegin;
1816   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1821 
1822 #undef __FUNC__
1823 #define  __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ"
1824 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1825 {
1826   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1827   Mat         a,b,c,d;
1828   PetscTruth  flg;
1829   int         ierr;
1830 
1831   PetscFunctionBegin;
1832   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1833   a = matA->A; b = matA->B;
1834   c = matB->A; d = matB->B;
1835 
1836   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1837   if (flg == PETSC_TRUE) {
1838     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1839   }
1840   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 /* -------------------------------------------------------------------*/
1845 static struct _MatOps MatOps_Values = {
1846   MatSetValues_MPIBAIJ,
1847   MatGetRow_MPIBAIJ,
1848   MatRestoreRow_MPIBAIJ,
1849   MatMult_MPIBAIJ,
1850   MatMultAdd_MPIBAIJ,
1851   MatMultTranspose_MPIBAIJ,
1852   MatMultTransposeAdd_MPIBAIJ,
1853   0,
1854   0,
1855   0,
1856   0,
1857   0,
1858   0,
1859   0,
1860   MatTranspose_MPIBAIJ,
1861   MatGetInfo_MPIBAIJ,
1862   MatEqual_MPIBAIJ,
1863   MatGetDiagonal_MPIBAIJ,
1864   MatDiagonalScale_MPIBAIJ,
1865   MatNorm_MPIBAIJ,
1866   MatAssemblyBegin_MPIBAIJ,
1867   MatAssemblyEnd_MPIBAIJ,
1868   0,
1869   MatSetOption_MPIBAIJ,
1870   MatZeroEntries_MPIBAIJ,
1871   MatZeroRows_MPIBAIJ,
1872   0,
1873   0,
1874   0,
1875   0,
1876   MatGetSize_MPIBAIJ,
1877   MatGetLocalSize_MPIBAIJ,
1878   MatGetOwnershipRange_MPIBAIJ,
1879   0,
1880   0,
1881   0,
1882   0,
1883   MatDuplicate_MPIBAIJ,
1884   0,
1885   0,
1886   0,
1887   0,
1888   0,
1889   MatGetSubMatrices_MPIBAIJ,
1890   MatIncreaseOverlap_MPIBAIJ,
1891   MatGetValues_MPIBAIJ,
1892   0,
1893   MatPrintHelp_MPIBAIJ,
1894   MatScale_MPIBAIJ,
1895   0,
1896   0,
1897   0,
1898   MatGetBlockSize_MPIBAIJ,
1899   0,
1900   0,
1901   0,
1902   0,
1903   0,
1904   0,
1905   MatSetUnfactored_MPIBAIJ,
1906   0,
1907   MatSetValuesBlocked_MPIBAIJ,
1908   0,
1909   0,
1910   0,
1911   MatGetMaps_Petsc};
1912 
1913 
1914 EXTERN_C_BEGIN
1915 #undef __FUNC__
1916 #define  __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ"
1917 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1918 {
1919   PetscFunctionBegin;
1920   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1921   *iscopy = PETSC_FALSE;
1922   PetscFunctionReturn(0);
1923 }
1924 EXTERN_C_END
1925 
1926 #undef __FUNC__
1927 #define  __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ"
1928 /*@C
1929    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1930    (block compressed row).  For good matrix assembly performance
1931    the user should preallocate the matrix storage by setting the parameters
1932    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1933    performance can be increased by more than a factor of 50.
1934 
1935    Collective on MPI_Comm
1936 
1937    Input Parameters:
1938 +  comm - MPI communicator
1939 .  bs   - size of blockk
1940 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1941            This value should be the same as the local size used in creating the
1942            y vector for the matrix-vector product y = Ax.
1943 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1944            This value should be the same as the local size used in creating the
1945            x vector for the matrix-vector product y = Ax.
1946 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1947 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1948 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1949            submatrix  (same for all local rows)
1950 .  d_nnz - array containing the number of block nonzeros in the various block rows
1951            of the in diagonal portion of the local (possibly different for each block
1952            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1953 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1954            submatrix (same for all local rows).
1955 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1956            off-diagonal portion of the local submatrix (possibly different for
1957            each block row) or PETSC_NULL.
1958 
1959    Output Parameter:
1960 .  A - the matrix
1961 
1962    Options Database Keys:
1963 .   -mat_no_unroll - uses code that does not unroll the loops in the
1964                      block calculations (much slower)
1965 .   -mat_block_size - size of the blocks to use
1966 .   -mat_mpi - use the parallel matrix data structures even on one processor
1967                (defaults to using SeqBAIJ format on one processor)
1968 
1969    Notes:
1970    The user MUST specify either the local or global matrix dimensions
1971    (possibly both).
1972 
1973    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1974    than it must be used on all processors that share the object for that argument.
1975 
1976    Storage Information:
1977    For a square global matrix we define each processor's diagonal portion
1978    to be its local rows and the corresponding columns (a square submatrix);
1979    each processor's off-diagonal portion encompasses the remainder of the
1980    local matrix (a rectangular submatrix).
1981 
1982    The user can specify preallocated storage for the diagonal part of
1983    the local submatrix with either d_nz or d_nnz (not both).  Set
1984    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1985    memory allocation.  Likewise, specify preallocated storage for the
1986    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1987 
1988    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1989    the figure below we depict these three local rows and all columns (0-11).
1990 
1991 .vb
1992            0 1 2 3 4 5 6 7 8 9 10 11
1993           -------------------
1994    row 3  |  o o o d d d o o o o o o
1995    row 4  |  o o o d d d o o o o o o
1996    row 5  |  o o o d d d o o o o o o
1997           -------------------
1998 .ve
1999 
2000    Thus, any entries in the d locations are stored in the d (diagonal)
2001    submatrix, and any entries in the o locations are stored in the
2002    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2003    stored simply in the MATSEQBAIJ format for compressed row storage.
2004 
2005    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2006    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2007    In general, for PDE problems in which most nonzeros are near the diagonal,
2008    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2009    or you will get TERRIBLE performance; see the users' manual chapter on
2010    matrices.
2011 
2012    Level: intermediate
2013 
2014 .keywords: matrix, block, aij, compressed row, sparse, parallel
2015 
2016 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2017 @*/
2018 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2019 {
2020   Mat          B;
2021   Mat_MPIBAIJ  *b;
2022   int          ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
2023   PetscTruth   flag1,flag2,flg;
2024 
2025   PetscFunctionBegin;
2026   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2027 
2028   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
2029   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
2030   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
2031   if (d_nnz) {
2032     for (i=0; i<m/bs; i++) {
2033       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2034     }
2035   }
2036   if (o_nnz) {
2037     for (i=0; i<m/bs; i++) {
2038       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2039     }
2040   }
2041 
2042   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2043   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2044   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2045   if (!flag1 && !flag2 && size == 1) {
2046     if (M == PETSC_DECIDE) M = m;
2047     if (N == PETSC_DECIDE) N = n;
2048     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2049     PetscFunctionReturn(0);
2050   }
2051 
2052   *A = 0;
2053   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2054   PLogObjectCreate(B);
2055   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2056   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2057   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2058 
2059   B->ops->destroy    = MatDestroy_MPIBAIJ;
2060   B->ops->view       = MatView_MPIBAIJ;
2061   B->mapping    = 0;
2062   B->factor     = 0;
2063   B->assembled  = PETSC_FALSE;
2064 
2065   B->insertmode = NOT_SET_VALUES;
2066   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2067   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2068 
2069   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2070     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2071   }
2072   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2073     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2074   }
2075   if (N == PETSC_DECIDE && n == PETSC_DECIDE) {
2076     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2077   }
2078   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2079   if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2080 
2081   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2082     work[0] = m; work[1] = n;
2083     mbs = m/bs; nbs = n/bs;
2084     ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2085     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2086     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2087   }
2088   if (m == PETSC_DECIDE) {
2089     Mbs = M/bs;
2090     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2091     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2092     m   = mbs*bs;
2093   }
2094   if (n == PETSC_DECIDE) {
2095     Nbs = N/bs;
2096     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2097     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2098     n   = nbs*bs;
2099   }
2100   if (mbs*bs != m || nbs*bs != n) {
2101     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2102   }
2103 
2104   b->m = m; B->m = m;
2105   b->n = n; B->n = n;
2106   b->N = N; B->N = N;
2107   b->M = M; B->M = M;
2108   b->bs  = bs;
2109   b->bs2 = bs*bs;
2110   b->mbs = mbs;
2111   b->nbs = nbs;
2112   b->Mbs = Mbs;
2113   b->Nbs = Nbs;
2114 
2115   /* the information in the maps duplicates the information computed below, eventually
2116      we should remove the duplicate information that is not contained in the maps */
2117   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2118   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2119 
2120   /* build local table of row and column ownerships */
2121   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2122   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2123   b->cowners    = b->rowners + b->size + 2;
2124   b->rowners_bs = b->cowners + b->size + 2;
2125   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2126   b->rowners[0]    = 0;
2127   for (i=2; i<=b->size; i++) {
2128     b->rowners[i] += b->rowners[i-1];
2129   }
2130   for (i=0; i<=b->size; i++) {
2131     b->rowners_bs[i] = b->rowners[i]*bs;
2132   }
2133   b->rstart    = b->rowners[b->rank];
2134   b->rend      = b->rowners[b->rank+1];
2135   b->rstart_bs = b->rstart * bs;
2136   b->rend_bs   = b->rend * bs;
2137 
2138   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2139   b->cowners[0] = 0;
2140   for (i=2; i<=b->size; i++) {
2141     b->cowners[i] += b->cowners[i-1];
2142   }
2143   b->cstart    = b->cowners[b->rank];
2144   b->cend      = b->cowners[b->rank+1];
2145   b->cstart_bs = b->cstart * bs;
2146   b->cend_bs   = b->cend * bs;
2147 
2148 
2149   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2150   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2151   PLogObjectParent(B,b->A);
2152   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2153   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2154   PLogObjectParent(B,b->B);
2155 
2156   /* build cache for off array entries formed */
2157   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2158   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2159   b->donotstash  = PETSC_FALSE;
2160   b->colmap      = PETSC_NULL;
2161   b->garray      = PETSC_NULL;
2162   b->roworiented = PETSC_TRUE;
2163 
2164   /* stuff used in block assembly */
2165   b->barray       = 0;
2166 
2167   /* stuff used for matrix vector multiply */
2168   b->lvec         = 0;
2169   b->Mvctx        = 0;
2170 
2171   /* stuff for MatGetRow() */
2172   b->rowindices   = 0;
2173   b->rowvalues    = 0;
2174   b->getrowactive = PETSC_FALSE;
2175 
2176   /* hash table stuff */
2177   b->ht           = 0;
2178   b->hd           = 0;
2179   b->ht_size      = 0;
2180   b->ht_flag      = PETSC_FALSE;
2181   b->ht_fact      = 0;
2182   b->ht_total_ct  = 0;
2183   b->ht_insert_ct = 0;
2184 
2185   *A = B;
2186   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2187   if (flg) {
2188     double fact = 1.39;
2189     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2190     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2191     if (fact <= 1.0) fact = 1.39;
2192     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2193     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2194   }
2195   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2196                                      "MatStoreValues_MPIBAIJ",
2197                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2198   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2199                                      "MatRetrieveValues_MPIBAIJ",
2200                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2201   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2202                                      "MatGetDiagonalBlock_MPIBAIJ",
2203                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 #undef __FUNC__
2208 #define  __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ"
2209 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2210 {
2211   Mat         mat;
2212   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2213   int         ierr,len=0;
2214   PetscTruth  flg;
2215 
2216   PetscFunctionBegin;
2217   *newmat       = 0;
2218   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2219   PLogObjectCreate(mat);
2220   mat->data         = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2221   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2222   mat->ops->destroy = MatDestroy_MPIBAIJ;
2223   mat->ops->view    = MatView_MPIBAIJ;
2224   mat->factor       = matin->factor;
2225   mat->assembled    = PETSC_TRUE;
2226   mat->insertmode   = NOT_SET_VALUES;
2227 
2228   a->m = mat->m   = oldmat->m;
2229   a->n = mat->n   = oldmat->n;
2230   a->M = mat->M   = oldmat->M;
2231   a->N = mat->N   = oldmat->N;
2232 
2233   a->bs  = oldmat->bs;
2234   a->bs2 = oldmat->bs2;
2235   a->mbs = oldmat->mbs;
2236   a->nbs = oldmat->nbs;
2237   a->Mbs = oldmat->Mbs;
2238   a->Nbs = oldmat->Nbs;
2239 
2240   a->rstart       = oldmat->rstart;
2241   a->rend         = oldmat->rend;
2242   a->cstart       = oldmat->cstart;
2243   a->cend         = oldmat->cend;
2244   a->size         = oldmat->size;
2245   a->rank         = oldmat->rank;
2246   a->donotstash   = oldmat->donotstash;
2247   a->roworiented  = oldmat->roworiented;
2248   a->rowindices   = 0;
2249   a->rowvalues    = 0;
2250   a->getrowactive = PETSC_FALSE;
2251   a->barray       = 0;
2252   a->rstart_bs    = oldmat->rstart_bs;
2253   a->rend_bs      = oldmat->rend_bs;
2254   a->cstart_bs    = oldmat->cstart_bs;
2255   a->cend_bs      = oldmat->cend_bs;
2256 
2257   /* hash table stuff */
2258   a->ht           = 0;
2259   a->hd           = 0;
2260   a->ht_size      = 0;
2261   a->ht_flag      = oldmat->ht_flag;
2262   a->ht_fact      = oldmat->ht_fact;
2263   a->ht_total_ct  = 0;
2264   a->ht_insert_ct = 0;
2265 
2266 
2267   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2268   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2269   a->cowners    = a->rowners + a->size + 2;
2270   a->rowners_bs = a->cowners + a->size + 2;
2271   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2272   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2273   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2274   if (oldmat->colmap) {
2275 #if defined (PETSC_USE_CTABLE)
2276   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2277 #else
2278     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2279     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2280     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2281 #endif
2282   } else a->colmap = 0;
2283   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2284     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2285     PLogObjectMemory(mat,len*sizeof(int));
2286     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2287   } else a->garray = 0;
2288 
2289   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2290   PLogObjectParent(mat,a->lvec);
2291   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2292 
2293   PLogObjectParent(mat,a->Mvctx);
2294   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2295   PLogObjectParent(mat,a->A);
2296   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2297   PLogObjectParent(mat,a->B);
2298   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2299   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2300   if (flg) {
2301     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2302   }
2303   *newmat = mat;
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #include "sys.h"
2308 
2309 #undef __FUNC__
2310 #define  __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ"
2311 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2312 {
2313   Mat          A;
2314   int          i,nz,ierr,j,rstart,rend,fd;
2315   Scalar       *vals,*buf;
2316   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2317   MPI_Status   status;
2318   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2319   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2320   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2321   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2322   int          dcount,kmax,k,nzcount,tmp;
2323 
2324   PetscFunctionBegin;
2325   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2326 
2327   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2328   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2329   if (!rank) {
2330     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2331     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2332     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2333     if (header[3] < 0) {
2334       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2335     }
2336   }
2337 
2338   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2339   M = header[1]; N = header[2];
2340 
2341   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2342 
2343   /*
2344      This code adds extra rows to make sure the number of rows is
2345      divisible by the blocksize
2346   */
2347   Mbs        = M/bs;
2348   extra_rows = bs - M + bs*(Mbs);
2349   if (extra_rows == bs) extra_rows = 0;
2350   else                  Mbs++;
2351   if (extra_rows &&!rank) {
2352     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2353   }
2354 
2355   /* determine ownership of all rows */
2356   mbs = Mbs/size + ((Mbs % size) > rank);
2357   m   = mbs * bs;
2358   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2359   browners = rowners + size + 1;
2360   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2361   rowners[0] = 0;
2362   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2363   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2364   rstart = rowners[rank];
2365   rend   = rowners[rank+1];
2366 
2367   /* distribute row lengths to all processors */
2368   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2369   if (!rank) {
2370     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2371     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2372     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2373     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2374     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2375     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2376     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2377   } else {
2378     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2379   }
2380 
2381   if (!rank) {
2382     /* calculate the number of nonzeros on each processor */
2383     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2384     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2385     for (i=0; i<size; i++) {
2386       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2387         procsnz[i] += rowlengths[j];
2388       }
2389     }
2390     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2391 
2392     /* determine max buffer needed and allocate it */
2393     maxnz = 0;
2394     for (i=0; i<size; i++) {
2395       maxnz = PetscMax(maxnz,procsnz[i]);
2396     }
2397     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2398 
2399     /* read in my part of the matrix column indices  */
2400     nz = procsnz[0];
2401     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2402     mycols = ibuf;
2403     if (size == 1)  nz -= extra_rows;
2404     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2405     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2406 
2407     /* read in every ones (except the last) and ship off */
2408     for (i=1; i<size-1; i++) {
2409       nz   = procsnz[i];
2410       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2411       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2412     }
2413     /* read in the stuff for the last proc */
2414     if (size != 1) {
2415       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2416       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2417       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2418       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2419     }
2420     ierr = PetscFree(cols);CHKERRQ(ierr);
2421   } else {
2422     /* determine buffer space needed for message */
2423     nz = 0;
2424     for (i=0; i<m; i++) {
2425       nz += locrowlens[i];
2426     }
2427     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2428     mycols = ibuf;
2429     /* receive message of column indices*/
2430     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2431     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2432     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2433   }
2434 
2435   /* loop over local rows, determining number of off diagonal entries */
2436   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2437   odlens = dlens + (rend-rstart);
2438   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2439   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2440   masked1 = mask    + Mbs;
2441   masked2 = masked1 + Mbs;
2442   rowcount = 0; nzcount = 0;
2443   for (i=0; i<mbs; i++) {
2444     dcount  = 0;
2445     odcount = 0;
2446     for (j=0; j<bs; j++) {
2447       kmax = locrowlens[rowcount];
2448       for (k=0; k<kmax; k++) {
2449         tmp = mycols[nzcount++]/bs;
2450         if (!mask[tmp]) {
2451           mask[tmp] = 1;
2452           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2453           else masked1[dcount++] = tmp;
2454         }
2455       }
2456       rowcount++;
2457     }
2458 
2459     dlens[i]  = dcount;
2460     odlens[i] = odcount;
2461 
2462     /* zero out the mask elements we set */
2463     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2464     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2465   }
2466 
2467   /* create our matrix */
2468   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2469   A = *newmat;
2470   MatSetOption(A,MAT_COLUMNS_SORTED);
2471 
2472   if (!rank) {
2473     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2474     /* read in my part of the matrix numerical values  */
2475     nz = procsnz[0];
2476     vals = buf;
2477     mycols = ibuf;
2478     if (size == 1)  nz -= extra_rows;
2479     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2480     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2481 
2482     /* insert into matrix */
2483     jj      = rstart*bs;
2484     for (i=0; i<m; i++) {
2485       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2486       mycols += locrowlens[i];
2487       vals   += locrowlens[i];
2488       jj++;
2489     }
2490     /* read in other processors (except the last one) and ship out */
2491     for (i=1; i<size-1; i++) {
2492       nz   = procsnz[i];
2493       vals = buf;
2494       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2495       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2496     }
2497     /* the last proc */
2498     if (size != 1){
2499       nz   = procsnz[i] - extra_rows;
2500       vals = buf;
2501       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2502       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2503       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2504     }
2505     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2506   } else {
2507     /* receive numeric values */
2508     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2509 
2510     /* receive message of values*/
2511     vals   = buf;
2512     mycols = ibuf;
2513     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2514     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2515     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2516 
2517     /* insert into matrix */
2518     jj      = rstart*bs;
2519     for (i=0; i<m; i++) {
2520       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2521       mycols += locrowlens[i];
2522       vals   += locrowlens[i];
2523       jj++;
2524     }
2525   }
2526   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2527   ierr = PetscFree(buf);CHKERRQ(ierr);
2528   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2529   ierr = PetscFree(rowners);CHKERRQ(ierr);
2530   ierr = PetscFree(dlens);CHKERRQ(ierr);
2531   ierr = PetscFree(mask);CHKERRQ(ierr);
2532   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2533   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 #undef __FUNC__
2538 #define  __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor"
2539 /*@
2540    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2541 
2542    Input Parameters:
2543 .  mat  - the matrix
2544 .  fact - factor
2545 
2546    Collective on Mat
2547 
2548    Level: advanced
2549 
2550   Notes:
2551    This can also be set by the command line option: -mat_use_hash_table fact
2552 
2553 .keywords: matrix, hashtable, factor, HT
2554 
2555 .seealso: MatSetOption()
2556 @*/
2557 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2558 {
2559   Mat_MPIBAIJ *baij;
2560 
2561   PetscFunctionBegin;
2562   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2563   if (mat->type != MATMPIBAIJ) {
2564     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2565   }
2566   baij = (Mat_MPIBAIJ*)mat->data;
2567   baij->ht_fact = fact;
2568   PetscFunctionReturn(0);
2569 }
2570