xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision b2863d3a52358f7ee22ca3a1f84f250141973e94)
1 /*$Id: mpibaij.c,v 1.188 2000/04/09 03:09:57 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 extern int MatSetUpMultiply_MPIBAIJ(Mat);
7 extern int DisAssemble_MPIBAIJ(Mat);
8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15 extern int MatPrintHelp_SeqBAIJ(Mat);
16 
17 EXTERN_C_BEGIN
18 #undef __FUNC__
19 #define  __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ"
20 int MatStoreValues_MPIBAIJ(Mat mat)
21 {
22   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
23   int         ierr;
24 
25   PetscFunctionBegin;
26   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
27   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
28   PetscFunctionReturn(0);
29 }
30 EXTERN_C_END
31 
32 EXTERN_C_BEGIN
33 #undef __FUNC__
34 #define  __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ"
35 int MatRetrieveValues_MPIBAIJ(Mat mat)
36 {
37   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
38   int         ierr;
39 
40   PetscFunctionBegin;
41   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
42   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
43   PetscFunctionReturn(0);
44 }
45 EXTERN_C_END
46 
47 /*
48      Local utility routine that creates a mapping from the global column
49    number to the local number in the off-diagonal part of the local
50    storage of the matrix.  This is done in a non scable way since the
51    length of colmap equals the global matrix length.
52 */
53 #undef __FUNC__
54 #define  __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private"
55 static int CreateColmap_MPIBAIJ_Private(Mat mat)
56 {
57   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
58   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
59   int         nbs = B->nbs,i,bs=B->bs,ierr;
60 
61   PetscFunctionBegin;
62 #if defined (PETSC_USE_CTABLE)
63   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
64   for (i=0; i<nbs; i++){
65     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
66   }
67 #else
68   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
69   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
70   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
71   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
72 #endif
73   PetscFunctionReturn(0);
74 }
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         MatScalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
109  \
110         /* malloc new storage space */ \
111         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
112         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
113         new_j   = (int*)(new_a + bs2*new_nz); \
114         new_i   = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
118         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
122         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
124         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
125                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
126         /* free up old matrix storage */ \
127         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
128         if (!a->singlemalloc) { \
129           ierr = PetscFree(a->i);CHKERRQ(ierr); \
130           ierr = PetscFree(a->j);CHKERRQ(ierr);\
131         } \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = PETSC_TRUE; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 
155 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
156 { \
157     brow = row/bs;  \
158     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
159     rmax = bimax[brow]; nrow = bilen[brow]; \
160       bcol = col/bs; \
161       ridx = row % bs; cidx = col % bs; \
162       low = 0; high = nrow; \
163       while (high-low > 3) { \
164         t = (low+high)/2; \
165         if (rp[t] > bcol) high = t; \
166         else              low  = t; \
167       } \
168       for (_i=low; _i<high; _i++) { \
169         if (rp[_i] > bcol) break; \
170         if (rp[_i] == bcol) { \
171           bap  = ap +  bs2*_i + bs*cidx + ridx; \
172           if (addv == ADD_VALUES) *bap += value;  \
173           else                    *bap  = value;  \
174           goto b_noinsert; \
175         } \
176       } \
177       if (b->nonew == 1) goto b_noinsert; \
178       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
179       if (nrow >= rmax) { \
180         /* there is no extra room in row, therefore enlarge */ \
181         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
182         MatScalar *new_a; \
183  \
184         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
185  \
186         /* malloc new storage space */ \
187         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
188         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
189         new_j   = (int*)(new_a + bs2*new_nz); \
190         new_i   = new_j + new_nz; \
191  \
192         /* copy over old data into new slots */ \
193         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
194         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
195         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
196         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
197         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
198         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
201                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
202         /* free up old matrix storage */ \
203         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
204         if (!b->singlemalloc) { \
205           ierr = PetscFree(b->i);CHKERRQ(ierr); \
206           ierr = PetscFree(b->j);CHKERRQ(ierr); \
207         } \
208         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
209         b->singlemalloc = PETSC_TRUE; \
210  \
211         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
212         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
213         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
214         b->maxnz += bs2*CHUNKSIZE; \
215         b->reallocs++; \
216         b->nz++; \
217       } \
218       N = nrow++ - 1;  \
219       /* shift up all the later entries in this row */ \
220       for (ii=N; ii>=_i; ii--) { \
221         rp[ii+1] = rp[ii]; \
222         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223       } \
224       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
225       rp[_i]                      = bcol;  \
226       ap[bs2*_i + bs*cidx + ridx] = value;  \
227       b_noinsert:; \
228     bilen[brow] = nrow; \
229 }
230 
231 #undef __FUNC__
232 #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ"
233 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
234 {
235   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
236   Scalar      value;
237   int         ierr,i,j,row,col;
238   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
239   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
240   int         cend_orig=baij->cend_bs,bs=baij->bs;
241 
242   /* Some Variables required in the macro */
243   Mat         A = baij->A;
244   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
245   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
246   MatScalar   *aa=a->a;
247 
248   Mat         B = baij->B;
249   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
250   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
251   MatScalar   *ba=b->a;
252 
253   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
254   int         low,high,t,ridx,cidx,bs2=a->bs2;
255   MatScalar   *ap,*bap;
256 
257   PetscFunctionBegin;
258   for (i=0; i<m; i++) {
259     if (im[i] < 0) continue;
260 #if defined(PETSC_USE_BOPT_g)
261     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
262 #endif
263     if (im[i] >= rstart_orig && im[i] < rend_orig) {
264       row = im[i] - rstart_orig;
265       for (j=0; j<n; j++) {
266         if (in[j] >= cstart_orig && in[j] < cend_orig){
267           col = in[j] - cstart_orig;
268           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
269           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
270           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
271         } else if (in[j] < 0) continue;
272 #if defined(PETSC_USE_BOPT_g)
273         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
274 #endif
275         else {
276           if (mat->was_assembled) {
277             if (!baij->colmap) {
278               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
279             }
280 #if defined (PETSC_USE_CTABLE)
281             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
282             col  = col - 1 + in[j]%bs;
283 #else
284             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
285 #endif
286             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
287               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
288               col =  in[j];
289               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
290               B = baij->B;
291               b = (Mat_SeqBAIJ*)(B)->data;
292               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
293               ba=b->a;
294             }
295           } else col = in[j];
296           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
297           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
298           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
299         }
300       }
301     } else {
302       if (!baij->donotstash) {
303         if (roworiented) {
304           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
305         } else {
306           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
307         }
308       }
309     }
310   }
311   PetscFunctionReturn(0);
312 }
313 
314 #undef __FUNC__
315 #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ"
316 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
317 {
318   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
319   Scalar      *value,*barray=baij->barray;
320   int         ierr,i,j,ii,jj,row,col;
321   int         roworiented = baij->roworiented,rstart=baij->rstart ;
322   int         rend=baij->rend,cstart=baij->cstart,stepval;
323   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
324 
325   PetscFunctionBegin;
326   if(!barray) {
327     baij->barray = barray = (Scalar*)PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
328   }
329 
330   if (roworiented) {
331     stepval = (n-1)*bs;
332   } else {
333     stepval = (m-1)*bs;
334   }
335   for (i=0; i<m; i++) {
336     if (im[i] < 0) continue;
337 #if defined(PETSC_USE_BOPT_g)
338     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
339 #endif
340     if (im[i] >= rstart && im[i] < rend) {
341       row = im[i] - rstart;
342       for (j=0; j<n; j++) {
343         /* If NumCol = 1 then a copy is not required */
344         if ((roworiented) && (n == 1)) {
345           barray = v + i*bs2;
346         } else if((!roworiented) && (m == 1)) {
347           barray = v + j*bs2;
348         } else { /* Here a copy is required */
349           if (roworiented) {
350             value = v + i*(stepval+bs)*bs + j*bs;
351           } else {
352             value = v + j*(stepval+bs)*bs + i*bs;
353           }
354           for (ii=0; ii<bs; ii++,value+=stepval) {
355             for (jj=0; jj<bs; jj++) {
356               *barray++  = *value++;
357             }
358           }
359           barray -=bs2;
360         }
361 
362         if (in[j] >= cstart && in[j] < cend){
363           col  = in[j] - cstart;
364           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
365         }
366         else if (in[j] < 0) continue;
367 #if defined(PETSC_USE_BOPT_g)
368         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
369 #endif
370         else {
371           if (mat->was_assembled) {
372             if (!baij->colmap) {
373               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
374             }
375 
376 #if defined(PETSC_USE_BOPT_g)
377 #if defined (PETSC_USE_CTABLE)
378             { int data;
379               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
380               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
381             }
382 #else
383             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
384 #endif
385 #endif
386 #if defined (PETSC_USE_CTABLE)
387 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
388             col  = (col - 1)/bs;
389 #else
390             col = (baij->colmap[in[j]] - 1)/bs;
391 #endif
392             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
393               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
394               col =  in[j];
395             }
396           }
397           else col = in[j];
398           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
399         }
400       }
401     } else {
402       if (!baij->donotstash) {
403         if (roworiented) {
404           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
405         } else {
406           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
407         }
408       }
409     }
410   }
411   PetscFunctionReturn(0);
412 }
413 #define HASH_KEY 0.6180339887
414 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
415 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
416 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
417 #undef __FUNC__
418 #define  __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT"
419 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
420 {
421   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
422   int         ierr,i,j,row,col;
423   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
424   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
425   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
426   PetscReal   tmp;
427   MatScalar   ** HD = baij->hd,value;
428 #if defined(PETSC_USE_BOPT_g)
429   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
430 #endif
431 
432   PetscFunctionBegin;
433 
434   for (i=0; i<m; i++) {
435 #if defined(PETSC_USE_BOPT_g)
436     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
437     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
438 #endif
439       row = im[i];
440     if (row >= rstart_orig && row < rend_orig) {
441       for (j=0; j<n; j++) {
442         col = in[j];
443         if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m];
444         /* Look up into the Hash Table */
445         key = (row/bs)*Nbs+(col/bs)+1;
446         h1  = HASH(size,key,tmp);
447 
448 
449         idx = h1;
450 #if defined(PETSC_USE_BOPT_g)
451         insert_ct++;
452         total_ct++;
453         if (HT[idx] != key) {
454           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
455           if (idx == size) {
456             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
457             if (idx == h1) {
458               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
459             }
460           }
461         }
462 #else
463         if (HT[idx] != key) {
464           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
465           if (idx == size) {
466             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
467             if (idx == h1) {
468               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
469             }
470           }
471         }
472 #endif
473         /* A HASH table entry is found, so insert the values at the correct address */
474         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
475         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
476       }
477     } else {
478       if (!baij->donotstash) {
479         if (roworiented) {
480           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
481         } else {
482           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
483         }
484       }
485     }
486   }
487 #if defined(PETSC_USE_BOPT_g)
488   baij->ht_total_ct = total_ct;
489   baij->ht_insert_ct = insert_ct;
490 #endif
491   PetscFunctionReturn(0);
492 }
493 
494 #undef __FUNC__
495 #define  __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT"
496 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
497 {
498   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
499   int         ierr,i,j,ii,jj,row,col;
500   int         roworiented = baij->roworiented,rstart=baij->rstart ;
501   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
502   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
503   PetscReal   tmp;
504   MatScalar   **HD = baij->hd,*baij_a;
505   Scalar      *v_t,*value;
506 #if defined(PETSC_USE_BOPT_g)
507   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
508 #endif
509 
510   PetscFunctionBegin;
511 
512   if (roworiented) {
513     stepval = (n-1)*bs;
514   } else {
515     stepval = (m-1)*bs;
516   }
517   for (i=0; i<m; i++) {
518 #if defined(PETSC_USE_BOPT_g)
519     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
520     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
521 #endif
522     row   = im[i];
523     v_t   = v + i*bs2;
524     if (row >= rstart && row < rend) {
525       for (j=0; j<n; j++) {
526         col = in[j];
527 
528         /* Look up into the Hash Table */
529         key = row*Nbs+col+1;
530         h1  = HASH(size,key,tmp);
531 
532         idx = h1;
533 #if defined(PETSC_USE_BOPT_g)
534         total_ct++;
535         insert_ct++;
536        if (HT[idx] != key) {
537           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
538           if (idx == size) {
539             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
540             if (idx == h1) {
541               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
542             }
543           }
544         }
545 #else
546         if (HT[idx] != key) {
547           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
548           if (idx == size) {
549             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
550             if (idx == h1) {
551               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
552             }
553           }
554         }
555 #endif
556         baij_a = HD[idx];
557         if (roworiented) {
558           /*value = v + i*(stepval+bs)*bs + j*bs;*/
559           /* value = v + (i*(stepval+bs)+j)*bs; */
560           value = v_t;
561           v_t  += bs;
562           if (addv == ADD_VALUES) {
563             for (ii=0; ii<bs; ii++,value+=stepval) {
564               for (jj=ii; jj<bs2; jj+=bs) {
565                 baij_a[jj]  += *value++;
566               }
567             }
568           } else {
569             for (ii=0; ii<bs; ii++,value+=stepval) {
570               for (jj=ii; jj<bs2; jj+=bs) {
571                 baij_a[jj]  = *value++;
572               }
573             }
574           }
575         } else {
576           value = v + j*(stepval+bs)*bs + i*bs;
577           if (addv == ADD_VALUES) {
578             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
579               for (jj=0; jj<bs; jj++) {
580                 baij_a[jj]  += *value++;
581               }
582             }
583           } else {
584             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
585               for (jj=0; jj<bs; jj++) {
586                 baij_a[jj]  = *value++;
587               }
588             }
589           }
590         }
591       }
592     } else {
593       if (!baij->donotstash) {
594         if (roworiented) {
595           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
596         } else {
597           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
598         }
599       }
600     }
601   }
602 #if defined(PETSC_USE_BOPT_g)
603   baij->ht_total_ct = total_ct;
604   baij->ht_insert_ct = insert_ct;
605 #endif
606   PetscFunctionReturn(0);
607 }
608 
609 #undef __FUNC__
610 #define  __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ"
611 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
612 {
613   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
614   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
615   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
616 
617   PetscFunctionBegin;
618   for (i=0; i<m; i++) {
619     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
620     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
621     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
622       row = idxm[i] - bsrstart;
623       for (j=0; j<n; j++) {
624         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
625         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
626         if (idxn[j] >= bscstart && idxn[j] < bscend){
627           col = idxn[j] - bscstart;
628           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
629         } else {
630           if (!baij->colmap) {
631             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
632           }
633 #if defined (PETSC_USE_CTABLE)
634           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
635           data --;
636 #else
637           data = baij->colmap[idxn[j]/bs]-1;
638 #endif
639           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
640           else {
641             col  = data + idxn[j]%bs;
642             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
643           }
644         }
645       }
646     } else {
647       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
648     }
649   }
650  PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNC__
654 #define  __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ"
655 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
656 {
657   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
658   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
659   int        ierr,i,bs2=baij->bs2;
660   PetscReal  sum = 0.0;
661   MatScalar  *v;
662 
663   PetscFunctionBegin;
664   if (baij->size == 1) {
665     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
666   } else {
667     if (type == NORM_FROBENIUS) {
668       v = amat->a;
669       for (i=0; i<amat->nz*bs2; i++) {
670 #if defined(PETSC_USE_COMPLEX)
671         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
672 #else
673         sum += (*v)*(*v); v++;
674 #endif
675       }
676       v = bmat->a;
677       for (i=0; i<bmat->nz*bs2; i++) {
678 #if defined(PETSC_USE_COMPLEX)
679         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
680 #else
681         sum += (*v)*(*v); v++;
682 #endif
683       }
684       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
685       *norm = sqrt(*norm);
686     } else {
687       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
688     }
689   }
690   PetscFunctionReturn(0);
691 }
692 
693 
694 /*
695   Creates the hash table, and sets the table
696   This table is created only once.
697   If new entried need to be added to the matrix
698   then the hash table has to be destroyed and
699   recreated.
700 */
701 #undef __FUNC__
702 #define  __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private"
703 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
704 {
705   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
706   Mat         A = baij->A,B=baij->B;
707   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
708   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
709   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
710   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
711   int         *HT,key;
712   MatScalar   **HD;
713   PetscReal   tmp;
714 #if defined(PETSC_USE_BOPT_g)
715   int         ct=0,max=0;
716 #endif
717 
718   PetscFunctionBegin;
719   baij->ht_size=(int)(factor*nz);
720   size = baij->ht_size;
721 
722   if (baij->ht) {
723     PetscFunctionReturn(0);
724   }
725 
726   /* Allocate Memory for Hash Table */
727   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
728   baij->ht = (int*)(baij->hd + size);
729   HD = baij->hd;
730   HT = baij->ht;
731 
732 
733   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
734 
735 
736   /* Loop Over A */
737   for (i=0; i<a->mbs; i++) {
738     for (j=ai[i]; j<ai[i+1]; j++) {
739       row = i+rstart;
740       col = aj[j]+cstart;
741 
742       key = row*Nbs + col + 1;
743       h1  = HASH(size,key,tmp);
744       for (k=0; k<size; k++){
745         if (HT[(h1+k)%size] == 0.0) {
746           HT[(h1+k)%size] = key;
747           HD[(h1+k)%size] = a->a + j*bs2;
748           break;
749 #if defined(PETSC_USE_BOPT_g)
750         } else {
751           ct++;
752 #endif
753         }
754       }
755 #if defined(PETSC_USE_BOPT_g)
756       if (k> max) max = k;
757 #endif
758     }
759   }
760   /* Loop Over B */
761   for (i=0; i<b->mbs; i++) {
762     for (j=bi[i]; j<bi[i+1]; j++) {
763       row = i+rstart;
764       col = garray[bj[j]];
765       key = row*Nbs + col + 1;
766       h1  = HASH(size,key,tmp);
767       for (k=0; k<size; k++){
768         if (HT[(h1+k)%size] == 0.0) {
769           HT[(h1+k)%size] = key;
770           HD[(h1+k)%size] = b->a + j*bs2;
771           break;
772 #if defined(PETSC_USE_BOPT_g)
773         } else {
774           ct++;
775 #endif
776         }
777       }
778 #if defined(PETSC_USE_BOPT_g)
779       if (k> max) max = k;
780 #endif
781     }
782   }
783 
784   /* Print Summary */
785 #if defined(PETSC_USE_BOPT_g)
786   for (i=0,j=0; i<size; i++) {
787     if (HT[i]) {j++;}
788   }
789   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
790 #endif
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNC__
795 #define  __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ"
796 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
797 {
798   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
799   int         ierr,nstash,reallocs;
800   InsertMode  addv;
801 
802   PetscFunctionBegin;
803   if (baij->donotstash) {
804     PetscFunctionReturn(0);
805   }
806 
807   /* make sure all processors are either in INSERTMODE or ADDMODE */
808   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
809   if (addv == (ADD_VALUES|INSERT_VALUES)) {
810     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
811   }
812   mat->insertmode = addv; /* in case this processor had no cache */
813 
814   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
815   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
817   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
818   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
819   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNC__
824 #define  __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ"
825 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
828   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
829   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
830   int         *row,*col,other_disassembled;
831   PetscTruth  r1,r2,r3;
832   MatScalar   *val;
833   InsertMode  addv = mat->insertmode;
834 
835   PetscFunctionBegin;
836   if (!baij->donotstash) {
837     while (1) {
838       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
839       if (!flg) break;
840 
841       for (i=0; i<n;) {
842         /* Now identify the consecutive vals belonging to the same row */
843         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
844         if (j < n) ncols = j-i;
845         else       ncols = n-i;
846         /* Now assemble all these values with a single function call */
847         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
848         i = j;
849       }
850     }
851     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
852     /* Now process the block-stash. Since the values are stashed column-oriented,
853        set the roworiented flag to column oriented, and after MatSetValues()
854        restore the original flags */
855     r1 = baij->roworiented;
856     r2 = a->roworiented;
857     r3 = b->roworiented;
858     baij->roworiented = PETSC_FALSE;
859     a->roworiented    = PETSC_FALSE;
860     b->roworiented    = PETSC_FALSE;
861     while (1) {
862       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
863       if (!flg) break;
864 
865       for (i=0; i<n;) {
866         /* Now identify the consecutive vals belonging to the same row */
867         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
868         if (j < n) ncols = j-i;
869         else       ncols = n-i;
870         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
871         i = j;
872       }
873     }
874     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
875     baij->roworiented = r1;
876     a->roworiented    = r2;
877     b->roworiented    = r3;
878   }
879 
880   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
881   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
882 
883   /* determine if any processor has disassembled, if so we must
884      also disassemble ourselfs, in order that we may reassemble. */
885   /*
886      if nonzero structure of submatrix B cannot change then we know that
887      no processor disassembled thus we can skip this stuff
888   */
889   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
890     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
891     if (mat->was_assembled && !other_disassembled) {
892       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
893     }
894   }
895 
896   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
897     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
898   }
899   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
900   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
901 
902 #if defined(PETSC_USE_BOPT_g)
903   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
904     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
905     baij->ht_total_ct  = 0;
906     baij->ht_insert_ct = 0;
907   }
908 #endif
909   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
910     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
911     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
912     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
913   }
914 
915   if (baij->rowvalues) {
916     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
917     baij->rowvalues = 0;
918   }
919   PetscFunctionReturn(0);
920 }
921 
922 /*
923 #undef __FUNC__
924 #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_Binary"
925 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
926 {
927   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
928   int          ierr;
929 
930   PetscFunctionBegin;
931   if (baij->size == 1) {
932     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
933   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
934   PetscFunctionReturn(0);
935 }
936 */
937 
938 #undef __FUNC__
939 #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket"
940 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
941 {
942   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
943   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
944   PetscTruth   isascii,isdraw;
945 
946   PetscFunctionBegin;
947   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
948   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
949   if (isascii) {
950     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
951     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
952       MatInfo info;
953       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
954       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
955       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
956               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
957               baij->bs,(int)info.memory);CHKERRQ(ierr);
958       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
960       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
962       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
963       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
964       PetscFunctionReturn(0);
965     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
966       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
967       PetscFunctionReturn(0);
968     }
969   }
970 
971   if (isdraw) {
972     Draw       draw;
973     PetscTruth isnull;
974     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
975     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
976   }
977 
978   if (size == 1) {
979     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
980   } else {
981     /* assemble the entire matrix onto first processor. */
982     Mat         A;
983     Mat_SeqBAIJ *Aloc;
984     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
985     MatScalar   *a;
986 #if defined(PETSC_USE_MAT_SINGLE)
987     Scalar      *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar));
988     int         l;
989 #endif
990 
991     if (!rank) {
992       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
993     } else {
994       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
995     }
996     PLogObjectParent(mat,A);
997 
998     /* copy over the A part */
999     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
1000     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1001     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1002 
1003     for (i=0; i<mbs; i++) {
1004       rvals[0] = bs*(baij->rstart + i);
1005       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1006       for (j=ai[i]; j<ai[i+1]; j++) {
1007         col = (baij->cstart+aj[j])*bs;
1008         for (k=0; k<bs; k++) {
1009 #if defined(PETSC_USE_MAT_SINGLE)
1010           for (l=0; l<bs; l++) atmp[l] = a[l];
1011 #else
1012           atmp = a;
1013 #endif
1014           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,atmp,INSERT_VALUES);CHKERRQ(ierr);
1015           col++; a += bs;
1016         }
1017       }
1018     }
1019     /* copy over the B part */
1020     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1021     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1022     for (i=0; i<mbs; i++) {
1023       rvals[0] = bs*(baij->rstart + i);
1024       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1025       for (j=ai[i]; j<ai[i+1]; j++) {
1026         col = baij->garray[aj[j]]*bs;
1027         for (k=0; k<bs; k++) {
1028 #if defined(PETSC_USE_MAT_SINGLE)
1029           for (l=0; l<bs; l++) atmp[l] = a[l];
1030 #else
1031           atmp = a;
1032 #endif
1033           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,atmp,INSERT_VALUES);CHKERRQ(ierr);
1034           col++; a += bs;
1035         }
1036       }
1037     }
1038 #if defined(PETSC_USE_MAT_SINGLE)
1039     ierr = PetscFree(atmp);CHKERRQ(ierr);
1040 #endif
1041     ierr = PetscFree(rvals);CHKERRQ(ierr);
1042     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1043     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1044     /*
1045        Everyone has to call to draw the matrix since the graphics waits are
1046        synchronized across all processors that share the Draw object
1047     */
1048     if (!rank) {
1049       Viewer sviewer;
1050       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1051       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1052       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1053     }
1054     ierr = MatDestroy(A);CHKERRQ(ierr);
1055   }
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 
1060 
1061 #undef __FUNC__
1062 #define  __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ"
1063 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1064 {
1065   int        ierr;
1066   PetscTruth isascii,isdraw,issocket,isbinary;
1067 
1068   PetscFunctionBegin;
1069   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1070   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1071   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1072   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1073   if (isascii || isdraw || issocket || isbinary) {
1074     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1075   } else {
1076     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1077   }
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNC__
1082 #define  __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ"
1083 int MatDestroy_MPIBAIJ(Mat mat)
1084 {
1085   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1086   int         ierr;
1087 
1088   PetscFunctionBegin;
1089 
1090   if (mat->mapping) {
1091     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1092   }
1093   if (mat->bmapping) {
1094     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1095   }
1096   if (mat->rmap) {
1097     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1098   }
1099   if (mat->cmap) {
1100     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1101   }
1102 #if defined(PETSC_USE_LOG)
1103   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1104 #endif
1105 
1106   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1107   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1108 
1109   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1110   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1111   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1112 #if defined (PETSC_USE_CTABLE)
1113   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1114 #else
1115   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1116 #endif
1117   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1118   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1119   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1120   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1121   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1122   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1123   ierr = PetscFree(baij);CHKERRQ(ierr);
1124   PLogObjectDestroy(mat);
1125   PetscHeaderDestroy(mat);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNC__
1130 #define  __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ"
1131 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1132 {
1133   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1134   int         ierr,nt;
1135 
1136   PetscFunctionBegin;
1137   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1138   if (nt != a->n) {
1139     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1140   }
1141   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1142   if (nt != a->m) {
1143     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1144   }
1145   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1146   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1147   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1148   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1149   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNC__
1154 #define  __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ"
1155 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1156 {
1157   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1158   int        ierr;
1159 
1160   PetscFunctionBegin;
1161   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1162   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1163   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1164   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNC__
1169 #define  __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ"
1170 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1171 {
1172   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1173   int         ierr;
1174 
1175   PetscFunctionBegin;
1176   /* do nondiagonal part */
1177   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1178   /* send it on its way */
1179   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1180   /* do local part */
1181   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1182   /* receive remote parts: note this assumes the values are not actually */
1183   /* inserted in yy until the next line, which is true for my implementation*/
1184   /* but is not perhaps always true. */
1185   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNC__
1190 #define  __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ"
1191 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1192 {
1193   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1194   int         ierr;
1195 
1196   PetscFunctionBegin;
1197   /* do nondiagonal part */
1198   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1199   /* send it on its way */
1200   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1201   /* do local part */
1202   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1203   /* receive remote parts: note this assumes the values are not actually */
1204   /* inserted in yy until the next line, which is true for my implementation*/
1205   /* but is not perhaps always true. */
1206   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 /*
1211   This only works correctly for square matrices where the subblock A->A is the
1212    diagonal block
1213 */
1214 #undef __FUNC__
1215 #define  __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ"
1216 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1217 {
1218   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1219   int         ierr;
1220 
1221   PetscFunctionBegin;
1222   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1223   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNC__
1228 #define  __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ"
1229 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1230 {
1231   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1232   int         ierr;
1233 
1234   PetscFunctionBegin;
1235   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1236   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNC__
1241 #define  __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ"
1242 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1243 {
1244   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1245 
1246   PetscFunctionBegin;
1247   if (m) *m = mat->M;
1248   if (n) *n = mat->N;
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNC__
1253 #define  __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ"
1254 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1255 {
1256   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1257 
1258   PetscFunctionBegin;
1259   *m = mat->m; *n = mat->n;
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNC__
1264 #define  __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ"
1265 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1266 {
1267   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1268 
1269   PetscFunctionBegin;
1270   if (m) *m = mat->rstart*mat->bs;
1271   if (n) *n = mat->rend*mat->bs;
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNC__
1276 #define  __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ"
1277 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1278 {
1279   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1280   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1281   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1282   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1283   int        *cmap,*idx_p,cstart = mat->cstart;
1284 
1285   PetscFunctionBegin;
1286   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1287   mat->getrowactive = PETSC_TRUE;
1288 
1289   if (!mat->rowvalues && (idx || v)) {
1290     /*
1291         allocate enough space to hold information from the longest row.
1292     */
1293     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1294     int     max = 1,mbs = mat->mbs,tmp;
1295     for (i=0; i<mbs; i++) {
1296       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1297       if (max < tmp) { max = tmp; }
1298     }
1299     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1300     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1301   }
1302 
1303   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1304   lrow = row - brstart;
1305 
1306   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1307   if (!v)   {pvA = 0; pvB = 0;}
1308   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1309   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1310   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1311   nztot = nzA + nzB;
1312 
1313   cmap  = mat->garray;
1314   if (v  || idx) {
1315     if (nztot) {
1316       /* Sort by increasing column numbers, assuming A and B already sorted */
1317       int imark = -1;
1318       if (v) {
1319         *v = v_p = mat->rowvalues;
1320         for (i=0; i<nzB; i++) {
1321           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1322           else break;
1323         }
1324         imark = i;
1325         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1326         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1327       }
1328       if (idx) {
1329         *idx = idx_p = mat->rowindices;
1330         if (imark > -1) {
1331           for (i=0; i<imark; i++) {
1332             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1333           }
1334         } else {
1335           for (i=0; i<nzB; i++) {
1336             if (cmap[cworkB[i]/bs] < cstart)
1337               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1338             else break;
1339           }
1340           imark = i;
1341         }
1342         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1343         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1344       }
1345     } else {
1346       if (idx) *idx = 0;
1347       if (v)   *v   = 0;
1348     }
1349   }
1350   *nz = nztot;
1351   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1352   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNC__
1357 #define  __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ"
1358 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1359 {
1360   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1361 
1362   PetscFunctionBegin;
1363   if (baij->getrowactive == PETSC_FALSE) {
1364     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1365   }
1366   baij->getrowactive = PETSC_FALSE;
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNC__
1371 #define  __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ"
1372 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1373 {
1374   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1375 
1376   PetscFunctionBegin;
1377   *bs = baij->bs;
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNC__
1382 #define  __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ"
1383 int MatZeroEntries_MPIBAIJ(Mat A)
1384 {
1385   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1386   int         ierr;
1387 
1388   PetscFunctionBegin;
1389   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1390   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 #undef __FUNC__
1395 #define  __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ"
1396 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1397 {
1398   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1399   Mat         A = a->A,B = a->B;
1400   int         ierr;
1401   PetscReal   isend[5],irecv[5];
1402 
1403   PetscFunctionBegin;
1404   info->block_size     = (double)a->bs;
1405   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1406   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1407   isend[3] = info->memory;  isend[4] = info->mallocs;
1408   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1409   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1410   isend[3] += info->memory;  isend[4] += info->mallocs;
1411   if (flag == MAT_LOCAL) {
1412     info->nz_used      = isend[0];
1413     info->nz_allocated = isend[1];
1414     info->nz_unneeded  = isend[2];
1415     info->memory       = isend[3];
1416     info->mallocs      = isend[4];
1417   } else if (flag == MAT_GLOBAL_MAX) {
1418     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1419     info->nz_used      = irecv[0];
1420     info->nz_allocated = irecv[1];
1421     info->nz_unneeded  = irecv[2];
1422     info->memory       = irecv[3];
1423     info->mallocs      = irecv[4];
1424   } else if (flag == MAT_GLOBAL_SUM) {
1425     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1426     info->nz_used      = irecv[0];
1427     info->nz_allocated = irecv[1];
1428     info->nz_unneeded  = irecv[2];
1429     info->memory       = irecv[3];
1430     info->mallocs      = irecv[4];
1431   } else {
1432     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1433   }
1434   info->rows_global       = (double)a->M;
1435   info->columns_global    = (double)a->N;
1436   info->rows_local        = (double)a->m;
1437   info->columns_local     = (double)a->N;
1438   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1439   info->fill_ratio_needed = 0;
1440   info->factor_mallocs    = 0;
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNC__
1445 #define  __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ"
1446 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1447 {
1448   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1449   int         ierr;
1450 
1451   PetscFunctionBegin;
1452   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1453       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1454       op == MAT_COLUMNS_UNSORTED ||
1455       op == MAT_COLUMNS_SORTED ||
1456       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1457       op == MAT_KEEP_ZEROED_ROWS ||
1458       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1459         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1460         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1461   } else if (op == MAT_ROW_ORIENTED) {
1462         a->roworiented = PETSC_TRUE;
1463         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1464         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1465   } else if (op == MAT_ROWS_SORTED ||
1466              op == MAT_ROWS_UNSORTED ||
1467              op == MAT_SYMMETRIC ||
1468              op == MAT_STRUCTURALLY_SYMMETRIC ||
1469              op == MAT_YES_NEW_DIAGONALS ||
1470              op == MAT_USE_HASH_TABLE) {
1471     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1472   } else if (op == MAT_COLUMN_ORIENTED) {
1473     a->roworiented = PETSC_FALSE;
1474     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1475     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1476   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1477     a->donotstash = PETSC_TRUE;
1478   } else if (op == MAT_NO_NEW_DIAGONALS) {
1479     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1480   } else if (op == MAT_USE_HASH_TABLE) {
1481     a->ht_flag = PETSC_TRUE;
1482   } else {
1483     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1484   }
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNC__
1489 #define  __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ("
1490 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1491 {
1492   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1493   Mat_SeqBAIJ *Aloc;
1494   Mat         B;
1495   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1496   int         bs=baij->bs,mbs=baij->mbs;
1497   MatScalar   *a;
1498 #if defined(PETSC_USE_MAT_SINGLE)
1499   Scalar      *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar));
1500   int         l;
1501 #endif
1502 
1503   PetscFunctionBegin;
1504   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1505   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1506 
1507   /* copy over the A part */
1508   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1509   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1510   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1511 
1512   for (i=0; i<mbs; i++) {
1513     rvals[0] = bs*(baij->rstart + i);
1514     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1515     for (j=ai[i]; j<ai[i+1]; j++) {
1516       col = (baij->cstart+aj[j])*bs;
1517       for (k=0; k<bs; k++) {
1518 #if defined(PETSC_USE_MAT_SINGLE)
1519         for (l=0; l<bs; l++) atmp[l] = a[l];
1520 #else
1521         atmp = a;
1522 #endif
1523         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,atmp,INSERT_VALUES);CHKERRQ(ierr);
1524         col++; a += bs;
1525       }
1526     }
1527   }
1528   /* copy over the B part */
1529   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1530   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1531   for (i=0; i<mbs; i++) {
1532     rvals[0] = bs*(baij->rstart + i);
1533     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1534     for (j=ai[i]; j<ai[i+1]; j++) {
1535       col = baij->garray[aj[j]]*bs;
1536       for (k=0; k<bs; k++) {
1537 #if defined(PETSC_USE_MAT_SINGLE)
1538         for (l=0; l<bs; l++) atmp[l] = a[l];
1539 #else
1540         atmp = a;
1541 #endif
1542         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,atmp,INSERT_VALUES);CHKERRQ(ierr);
1543         col++; a += bs;
1544       }
1545     }
1546   }
1547 #if defined(PETSC_USE_MAT_SINGLE)
1548   ierr = PetscFree(atmp);CHKERRQ(ierr);
1549 #endif
1550   ierr = PetscFree(rvals);CHKERRQ(ierr);
1551   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1552   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1553 
1554   if (matout) {
1555     *matout = B;
1556   } else {
1557     PetscOps *Abops;
1558     MatOps   Aops;
1559 
1560     /* This isn't really an in-place transpose .... but free data structures from baij */
1561     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1562     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1563     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1564 #if defined (PETSC_USE_CTABLE)
1565     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1566 #else
1567     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1568 #endif
1569     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1570     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1571     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1572     ierr = PetscFree(baij);CHKERRQ(ierr);
1573 
1574     /*
1575        This is horrible, horrible code. We need to keep the
1576       A pointers for the bops and ops but copy everything
1577       else from C.
1578     */
1579     Abops   = A->bops;
1580     Aops    = A->ops;
1581     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1582     A->bops = Abops;
1583     A->ops  = Aops;
1584 
1585     PetscHeaderDestroy(B);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNC__
1591 #define  __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ"
1592 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1593 {
1594   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1595   Mat         a = baij->A,b = baij->B;
1596   int         ierr,s1,s2,s3;
1597 
1598   PetscFunctionBegin;
1599   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1600   if (rr) {
1601     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1602     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1603     /* Overlap communication with computation. */
1604     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1605   }
1606   if (ll) {
1607     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1608     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1609     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1610   }
1611   /* scale  the diagonal block */
1612   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1613 
1614   if (rr) {
1615     /* Do a scatter end and then right scale the off-diagonal block */
1616     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1617     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1618   }
1619 
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 #undef __FUNC__
1624 #define  __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ"
1625 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1626 {
1627   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1628   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1629   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1630   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1631   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1632   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1633   MPI_Comm       comm = A->comm;
1634   MPI_Request    *send_waits,*recv_waits;
1635   MPI_Status     recv_status,*send_status;
1636   IS             istmp;
1637 
1638   PetscFunctionBegin;
1639   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1640   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1641 
1642   /*  first count number of contributors to each processor */
1643   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1644   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1645   procs  = nprocs + size;
1646   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1647   for (i=0; i<N; i++) {
1648     idx   = rows[i];
1649     found = 0;
1650     for (j=0; j<size; j++) {
1651       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1652         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1653       }
1654     }
1655     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1656   }
1657   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1658 
1659   /* inform other processors of number of messages and max length*/
1660   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1661   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1662   nmax   = work[rank];
1663   nrecvs = work[size+rank];
1664   ierr = PetscFree(work);CHKERRQ(ierr);
1665 
1666   /* post receives:   */
1667   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1668   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1669   for (i=0; i<nrecvs; i++) {
1670     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1671   }
1672 
1673   /* do sends:
1674      1) starts[i] gives the starting index in svalues for stuff going to
1675      the ith processor
1676   */
1677   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1678   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1679   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1680   starts[0]  = 0;
1681   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1682   for (i=0; i<N; i++) {
1683     svalues[starts[owner[i]]++] = rows[i];
1684   }
1685   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1686 
1687   starts[0] = 0;
1688   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1689   count = 0;
1690   for (i=0; i<size; i++) {
1691     if (procs[i]) {
1692       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1693     }
1694   }
1695   ierr = PetscFree(starts);CHKERRQ(ierr);
1696 
1697   base = owners[rank]*bs;
1698 
1699   /*  wait on receives */
1700   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1701   source = lens + nrecvs;
1702   count  = nrecvs; slen = 0;
1703   while (count) {
1704     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1705     /* unpack receives into our local space */
1706     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1707     source[imdex]  = recv_status.MPI_SOURCE;
1708     lens[imdex]    = n;
1709     slen          += n;
1710     count--;
1711   }
1712   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1713 
1714   /* move the data into the send scatter */
1715   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1716   count = 0;
1717   for (i=0; i<nrecvs; i++) {
1718     values = rvalues + i*nmax;
1719     for (j=0; j<lens[i]; j++) {
1720       lrows[count++] = values[j] - base;
1721     }
1722   }
1723   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1724   ierr = PetscFree(lens);CHKERRQ(ierr);
1725   ierr = PetscFree(owner);CHKERRQ(ierr);
1726   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1727 
1728   /* actually zap the local rows */
1729   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1730   PLogObjectParent(A,istmp);
1731 
1732   /*
1733         Zero the required rows. If the "diagonal block" of the matrix
1734      is square and the user wishes to set the diagonal we use seperate
1735      code so that MatSetValues() is not called for each diagonal allocating
1736      new memory, thus calling lots of mallocs and slowing things down.
1737 
1738        Contributed by: Mathew Knepley
1739   */
1740   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1741   ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr);
1742   if (diag && (l->A->M == l->A->N)) {
1743     ierr      = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1744   } else if (diag) {
1745     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1746     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1747       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1748 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1749     }
1750     for (i = 0; i < slen; i++) {
1751       row  = lrows[i] + rstart_bs;
1752       ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1753     }
1754     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1755     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1756   } else {
1757     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1758   }
1759 
1760   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1761   ierr = PetscFree(lrows);CHKERRQ(ierr);
1762 
1763   /* wait on sends */
1764   if (nsends) {
1765     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1766     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1767     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1768   }
1769   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1770   ierr = PetscFree(svalues);CHKERRQ(ierr);
1771 
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 #undef __FUNC__
1776 #define  __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ"
1777 int MatPrintHelp_MPIBAIJ(Mat A)
1778 {
1779   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1780   MPI_Comm    comm = A->comm;
1781   static int  called = 0;
1782   int         ierr;
1783 
1784   PetscFunctionBegin;
1785   if (!a->rank) {
1786     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1787   }
1788   if (called) {PetscFunctionReturn(0);} else called = 1;
1789   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1790   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #undef __FUNC__
1795 #define  __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ"
1796 int MatSetUnfactored_MPIBAIJ(Mat A)
1797 {
1798   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1799   int         ierr;
1800 
1801   PetscFunctionBegin;
1802   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1807 
1808 #undef __FUNC__
1809 #define  __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ"
1810 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1811 {
1812   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1813   Mat         a,b,c,d;
1814   PetscTruth  flg;
1815   int         ierr;
1816 
1817   PetscFunctionBegin;
1818   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1819   a = matA->A; b = matA->B;
1820   c = matB->A; d = matB->B;
1821 
1822   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1823   if (flg == PETSC_TRUE) {
1824     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1825   }
1826   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 /* -------------------------------------------------------------------*/
1831 static struct _MatOps MatOps_Values = {
1832   MatSetValues_MPIBAIJ,
1833   MatGetRow_MPIBAIJ,
1834   MatRestoreRow_MPIBAIJ,
1835   MatMult_MPIBAIJ,
1836   MatMultAdd_MPIBAIJ,
1837   MatMultTranspose_MPIBAIJ,
1838   MatMultTransposeAdd_MPIBAIJ,
1839   0,
1840   0,
1841   0,
1842   0,
1843   0,
1844   0,
1845   0,
1846   MatTranspose_MPIBAIJ,
1847   MatGetInfo_MPIBAIJ,
1848   MatEqual_MPIBAIJ,
1849   MatGetDiagonal_MPIBAIJ,
1850   MatDiagonalScale_MPIBAIJ,
1851   MatNorm_MPIBAIJ,
1852   MatAssemblyBegin_MPIBAIJ,
1853   MatAssemblyEnd_MPIBAIJ,
1854   0,
1855   MatSetOption_MPIBAIJ,
1856   MatZeroEntries_MPIBAIJ,
1857   MatZeroRows_MPIBAIJ,
1858   0,
1859   0,
1860   0,
1861   0,
1862   MatGetSize_MPIBAIJ,
1863   MatGetLocalSize_MPIBAIJ,
1864   MatGetOwnershipRange_MPIBAIJ,
1865   0,
1866   0,
1867   0,
1868   0,
1869   MatDuplicate_MPIBAIJ,
1870   0,
1871   0,
1872   0,
1873   0,
1874   0,
1875   MatGetSubMatrices_MPIBAIJ,
1876   MatIncreaseOverlap_MPIBAIJ,
1877   MatGetValues_MPIBAIJ,
1878   0,
1879   MatPrintHelp_MPIBAIJ,
1880   MatScale_MPIBAIJ,
1881   0,
1882   0,
1883   0,
1884   MatGetBlockSize_MPIBAIJ,
1885   0,
1886   0,
1887   0,
1888   0,
1889   0,
1890   0,
1891   MatSetUnfactored_MPIBAIJ,
1892   0,
1893   MatSetValuesBlocked_MPIBAIJ,
1894   0,
1895   0,
1896   0,
1897   MatGetMaps_Petsc};
1898 
1899 
1900 EXTERN_C_BEGIN
1901 #undef __FUNC__
1902 #define  __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ"
1903 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1904 {
1905   PetscFunctionBegin;
1906   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1907   *iscopy = PETSC_FALSE;
1908   PetscFunctionReturn(0);
1909 }
1910 EXTERN_C_END
1911 
1912 #undef __FUNC__
1913 #define  __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ"
1914 /*@C
1915    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1916    (block compressed row).  For good matrix assembly performance
1917    the user should preallocate the matrix storage by setting the parameters
1918    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1919    performance can be increased by more than a factor of 50.
1920 
1921    Collective on MPI_Comm
1922 
1923    Input Parameters:
1924 +  comm - MPI communicator
1925 .  bs   - size of blockk
1926 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1927            This value should be the same as the local size used in creating the
1928            y vector for the matrix-vector product y = Ax.
1929 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1930            This value should be the same as the local size used in creating the
1931            x vector for the matrix-vector product y = Ax.
1932 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1933 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1934 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1935            submatrix  (same for all local rows)
1936 .  d_nnz - array containing the number of block nonzeros in the various block rows
1937            of the in diagonal portion of the local (possibly different for each block
1938            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1939 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1940            submatrix (same for all local rows).
1941 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1942            off-diagonal portion of the local submatrix (possibly different for
1943            each block row) or PETSC_NULL.
1944 
1945    Output Parameter:
1946 .  A - the matrix
1947 
1948    Options Database Keys:
1949 .   -mat_no_unroll - uses code that does not unroll the loops in the
1950                      block calculations (much slower)
1951 .   -mat_block_size - size of the blocks to use
1952 .   -mat_mpi - use the parallel matrix data structures even on one processor
1953                (defaults to using SeqBAIJ format on one processor)
1954 
1955    Notes:
1956    The user MUST specify either the local or global matrix dimensions
1957    (possibly both).
1958 
1959    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1960    than it must be used on all processors that share the object for that argument.
1961 
1962    Storage Information:
1963    For a square global matrix we define each processor's diagonal portion
1964    to be its local rows and the corresponding columns (a square submatrix);
1965    each processor's off-diagonal portion encompasses the remainder of the
1966    local matrix (a rectangular submatrix).
1967 
1968    The user can specify preallocated storage for the diagonal part of
1969    the local submatrix with either d_nz or d_nnz (not both).  Set
1970    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1971    memory allocation.  Likewise, specify preallocated storage for the
1972    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1973 
1974    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1975    the figure below we depict these three local rows and all columns (0-11).
1976 
1977 .vb
1978            0 1 2 3 4 5 6 7 8 9 10 11
1979           -------------------
1980    row 3  |  o o o d d d o o o o o o
1981    row 4  |  o o o d d d o o o o o o
1982    row 5  |  o o o d d d o o o o o o
1983           -------------------
1984 .ve
1985 
1986    Thus, any entries in the d locations are stored in the d (diagonal)
1987    submatrix, and any entries in the o locations are stored in the
1988    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1989    stored simply in the MATSEQBAIJ format for compressed row storage.
1990 
1991    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1992    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1993    In general, for PDE problems in which most nonzeros are near the diagonal,
1994    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1995    or you will get TERRIBLE performance; see the users' manual chapter on
1996    matrices.
1997 
1998    Level: intermediate
1999 
2000 .keywords: matrix, block, aij, compressed row, sparse, parallel
2001 
2002 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2003 @*/
2004 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2005 {
2006   Mat          B;
2007   Mat_MPIBAIJ  *b;
2008   int          ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
2009   PetscTruth   flag1,flag2,flg;
2010 
2011   PetscFunctionBegin;
2012   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2013 
2014   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
2015   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
2016   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
2017   if (d_nnz) {
2018     for (i=0; i<m/bs; i++) {
2019       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2020     }
2021   }
2022   if (o_nnz) {
2023     for (i=0; i<m/bs; i++) {
2024       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2025     }
2026   }
2027 
2028   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2029   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2030   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2031   if (!flag1 && !flag2 && size == 1) {
2032     if (M == PETSC_DECIDE) M = m;
2033     if (N == PETSC_DECIDE) N = n;
2034     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2035     PetscFunctionReturn(0);
2036   }
2037 
2038   *A = 0;
2039   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2040   PLogObjectCreate(B);
2041   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2042   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2043   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2044 
2045   B->ops->destroy    = MatDestroy_MPIBAIJ;
2046   B->ops->view       = MatView_MPIBAIJ;
2047   B->mapping    = 0;
2048   B->factor     = 0;
2049   B->assembled  = PETSC_FALSE;
2050 
2051   B->insertmode = NOT_SET_VALUES;
2052   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2053   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2054 
2055   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2056     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2057   }
2058   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2059     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2060   }
2061   if (N == PETSC_DECIDE && n == PETSC_DECIDE) {
2062     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2063   }
2064   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2065   if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2066 
2067   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2068     work[0] = m; work[1] = n;
2069     mbs = m/bs; nbs = n/bs;
2070     ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2071     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2072     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2073   }
2074   if (m == PETSC_DECIDE) {
2075     Mbs = M/bs;
2076     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2077     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2078     m   = mbs*bs;
2079   }
2080   if (n == PETSC_DECIDE) {
2081     Nbs = N/bs;
2082     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2083     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2084     n   = nbs*bs;
2085   }
2086   if (mbs*bs != m || nbs*bs != n) {
2087     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2088   }
2089 
2090   b->m = m; B->m = m;
2091   b->n = n; B->n = n;
2092   b->N = N; B->N = N;
2093   b->M = M; B->M = M;
2094   b->bs  = bs;
2095   b->bs2 = bs*bs;
2096   b->mbs = mbs;
2097   b->nbs = nbs;
2098   b->Mbs = Mbs;
2099   b->Nbs = Nbs;
2100 
2101   /* the information in the maps duplicates the information computed below, eventually
2102      we should remove the duplicate information that is not contained in the maps */
2103   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2104   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2105 
2106   /* build local table of row and column ownerships */
2107   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2108   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2109   b->cowners    = b->rowners + b->size + 2;
2110   b->rowners_bs = b->cowners + b->size + 2;
2111   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2112   b->rowners[0]    = 0;
2113   for (i=2; i<=b->size; i++) {
2114     b->rowners[i] += b->rowners[i-1];
2115   }
2116   for (i=0; i<=b->size; i++) {
2117     b->rowners_bs[i] = b->rowners[i]*bs;
2118   }
2119   b->rstart    = b->rowners[b->rank];
2120   b->rend      = b->rowners[b->rank+1];
2121   b->rstart_bs = b->rstart * bs;
2122   b->rend_bs   = b->rend * bs;
2123 
2124   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2125   b->cowners[0] = 0;
2126   for (i=2; i<=b->size; i++) {
2127     b->cowners[i] += b->cowners[i-1];
2128   }
2129   b->cstart    = b->cowners[b->rank];
2130   b->cend      = b->cowners[b->rank+1];
2131   b->cstart_bs = b->cstart * bs;
2132   b->cend_bs   = b->cend * bs;
2133 
2134 
2135   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2136   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2137   PLogObjectParent(B,b->A);
2138   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2139   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2140   PLogObjectParent(B,b->B);
2141 
2142   /* build cache for off array entries formed */
2143   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2144   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2145   b->donotstash  = PETSC_FALSE;
2146   b->colmap      = PETSC_NULL;
2147   b->garray      = PETSC_NULL;
2148   b->roworiented = PETSC_TRUE;
2149 
2150   /* stuff used in block assembly */
2151   b->barray       = 0;
2152 
2153   /* stuff used for matrix vector multiply */
2154   b->lvec         = 0;
2155   b->Mvctx        = 0;
2156 
2157   /* stuff for MatGetRow() */
2158   b->rowindices   = 0;
2159   b->rowvalues    = 0;
2160   b->getrowactive = PETSC_FALSE;
2161 
2162   /* hash table stuff */
2163   b->ht           = 0;
2164   b->hd           = 0;
2165   b->ht_size      = 0;
2166   b->ht_flag      = PETSC_FALSE;
2167   b->ht_fact      = 0;
2168   b->ht_total_ct  = 0;
2169   b->ht_insert_ct = 0;
2170 
2171   *A = B;
2172   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2173   if (flg) {
2174     double fact = 1.39;
2175     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2176     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2177     if (fact <= 1.0) fact = 1.39;
2178     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2179     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2180   }
2181   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2182                                      "MatStoreValues_MPIBAIJ",
2183                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2184   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2185                                      "MatRetrieveValues_MPIBAIJ",
2186                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2187   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2188                                      "MatGetDiagonalBlock_MPIBAIJ",
2189                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNC__
2194 #define  __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ"
2195 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2196 {
2197   Mat         mat;
2198   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2199   int         ierr,len=0;
2200   PetscTruth  flg;
2201 
2202   PetscFunctionBegin;
2203   *newmat       = 0;
2204   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2205   PLogObjectCreate(mat);
2206   mat->data         = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2207   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2208   mat->ops->destroy = MatDestroy_MPIBAIJ;
2209   mat->ops->view    = MatView_MPIBAIJ;
2210   mat->factor       = matin->factor;
2211   mat->assembled    = PETSC_TRUE;
2212   mat->insertmode   = NOT_SET_VALUES;
2213 
2214   a->m = mat->m   = oldmat->m;
2215   a->n = mat->n   = oldmat->n;
2216   a->M = mat->M   = oldmat->M;
2217   a->N = mat->N   = oldmat->N;
2218 
2219   a->bs  = oldmat->bs;
2220   a->bs2 = oldmat->bs2;
2221   a->mbs = oldmat->mbs;
2222   a->nbs = oldmat->nbs;
2223   a->Mbs = oldmat->Mbs;
2224   a->Nbs = oldmat->Nbs;
2225 
2226   a->rstart       = oldmat->rstart;
2227   a->rend         = oldmat->rend;
2228   a->cstart       = oldmat->cstart;
2229   a->cend         = oldmat->cend;
2230   a->size         = oldmat->size;
2231   a->rank         = oldmat->rank;
2232   a->donotstash   = oldmat->donotstash;
2233   a->roworiented  = oldmat->roworiented;
2234   a->rowindices   = 0;
2235   a->rowvalues    = 0;
2236   a->getrowactive = PETSC_FALSE;
2237   a->barray       = 0;
2238   a->rstart_bs    = oldmat->rstart_bs;
2239   a->rend_bs      = oldmat->rend_bs;
2240   a->cstart_bs    = oldmat->cstart_bs;
2241   a->cend_bs      = oldmat->cend_bs;
2242 
2243   /* hash table stuff */
2244   a->ht           = 0;
2245   a->hd           = 0;
2246   a->ht_size      = 0;
2247   a->ht_flag      = oldmat->ht_flag;
2248   a->ht_fact      = oldmat->ht_fact;
2249   a->ht_total_ct  = 0;
2250   a->ht_insert_ct = 0;
2251 
2252 
2253   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2254   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2255   a->cowners    = a->rowners + a->size + 2;
2256   a->rowners_bs = a->cowners + a->size + 2;
2257   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2258   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2259   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2260   if (oldmat->colmap) {
2261 #if defined (PETSC_USE_CTABLE)
2262   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2263 #else
2264     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2265     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2266     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2267 #endif
2268   } else a->colmap = 0;
2269   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2270     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2271     PLogObjectMemory(mat,len*sizeof(int));
2272     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2273   } else a->garray = 0;
2274 
2275   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2276   PLogObjectParent(mat,a->lvec);
2277   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2278 
2279   PLogObjectParent(mat,a->Mvctx);
2280   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2281   PLogObjectParent(mat,a->A);
2282   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2283   PLogObjectParent(mat,a->B);
2284   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2285   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2286   if (flg) {
2287     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2288   }
2289   *newmat = mat;
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #include "sys.h"
2294 
2295 #undef __FUNC__
2296 #define  __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ"
2297 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2298 {
2299   Mat          A;
2300   int          i,nz,ierr,j,rstart,rend,fd;
2301   Scalar       *vals,*buf;
2302   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2303   MPI_Status   status;
2304   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2305   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2306   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2307   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2308   int          dcount,kmax,k,nzcount,tmp;
2309 
2310   PetscFunctionBegin;
2311   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2312 
2313   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2314   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2315   if (!rank) {
2316     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2317     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2318     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2319     if (header[3] < 0) {
2320       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2321     }
2322   }
2323 
2324   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2325   M = header[1]; N = header[2];
2326 
2327   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2328 
2329   /*
2330      This code adds extra rows to make sure the number of rows is
2331      divisible by the blocksize
2332   */
2333   Mbs        = M/bs;
2334   extra_rows = bs - M + bs*(Mbs);
2335   if (extra_rows == bs) extra_rows = 0;
2336   else                  Mbs++;
2337   if (extra_rows &&!rank) {
2338     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2339   }
2340 
2341   /* determine ownership of all rows */
2342   mbs = Mbs/size + ((Mbs % size) > rank);
2343   m   = mbs * bs;
2344   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2345   browners = rowners + size + 1;
2346   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2347   rowners[0] = 0;
2348   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2349   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2350   rstart = rowners[rank];
2351   rend   = rowners[rank+1];
2352 
2353   /* distribute row lengths to all processors */
2354   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2355   if (!rank) {
2356     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2357     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2358     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2359     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2360     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2361     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2362     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2363   } else {
2364     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2365   }
2366 
2367   if (!rank) {
2368     /* calculate the number of nonzeros on each processor */
2369     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2370     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2371     for (i=0; i<size; i++) {
2372       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2373         procsnz[i] += rowlengths[j];
2374       }
2375     }
2376     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2377 
2378     /* determine max buffer needed and allocate it */
2379     maxnz = 0;
2380     for (i=0; i<size; i++) {
2381       maxnz = PetscMax(maxnz,procsnz[i]);
2382     }
2383     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2384 
2385     /* read in my part of the matrix column indices  */
2386     nz = procsnz[0];
2387     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2388     mycols = ibuf;
2389     if (size == 1)  nz -= extra_rows;
2390     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2391     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2392 
2393     /* read in every ones (except the last) and ship off */
2394     for (i=1; i<size-1; i++) {
2395       nz   = procsnz[i];
2396       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2397       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2398     }
2399     /* read in the stuff for the last proc */
2400     if (size != 1) {
2401       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2402       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2403       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2404       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2405     }
2406     ierr = PetscFree(cols);CHKERRQ(ierr);
2407   } else {
2408     /* determine buffer space needed for message */
2409     nz = 0;
2410     for (i=0; i<m; i++) {
2411       nz += locrowlens[i];
2412     }
2413     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2414     mycols = ibuf;
2415     /* receive message of column indices*/
2416     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2417     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2418     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2419   }
2420 
2421   /* loop over local rows, determining number of off diagonal entries */
2422   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2423   odlens = dlens + (rend-rstart);
2424   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2425   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2426   masked1 = mask    + Mbs;
2427   masked2 = masked1 + Mbs;
2428   rowcount = 0; nzcount = 0;
2429   for (i=0; i<mbs; i++) {
2430     dcount  = 0;
2431     odcount = 0;
2432     for (j=0; j<bs; j++) {
2433       kmax = locrowlens[rowcount];
2434       for (k=0; k<kmax; k++) {
2435         tmp = mycols[nzcount++]/bs;
2436         if (!mask[tmp]) {
2437           mask[tmp] = 1;
2438           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2439           else masked1[dcount++] = tmp;
2440         }
2441       }
2442       rowcount++;
2443     }
2444 
2445     dlens[i]  = dcount;
2446     odlens[i] = odcount;
2447 
2448     /* zero out the mask elements we set */
2449     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2450     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2451   }
2452 
2453   /* create our matrix */
2454   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2455   A = *newmat;
2456   MatSetOption(A,MAT_COLUMNS_SORTED);
2457 
2458   if (!rank) {
2459     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2460     /* read in my part of the matrix numerical values  */
2461     nz = procsnz[0];
2462     vals = buf;
2463     mycols = ibuf;
2464     if (size == 1)  nz -= extra_rows;
2465     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2466     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2467 
2468     /* insert into matrix */
2469     jj      = rstart*bs;
2470     for (i=0; i<m; i++) {
2471       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2472       mycols += locrowlens[i];
2473       vals   += locrowlens[i];
2474       jj++;
2475     }
2476     /* read in other processors (except the last one) and ship out */
2477     for (i=1; i<size-1; i++) {
2478       nz   = procsnz[i];
2479       vals = buf;
2480       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2481       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2482     }
2483     /* the last proc */
2484     if (size != 1){
2485       nz   = procsnz[i] - extra_rows;
2486       vals = buf;
2487       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2488       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2489       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2490     }
2491     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2492   } else {
2493     /* receive numeric values */
2494     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2495 
2496     /* receive message of values*/
2497     vals   = buf;
2498     mycols = ibuf;
2499     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2500     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2501     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2502 
2503     /* insert into matrix */
2504     jj      = rstart*bs;
2505     for (i=0; i<m; i++) {
2506       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2507       mycols += locrowlens[i];
2508       vals   += locrowlens[i];
2509       jj++;
2510     }
2511   }
2512   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2513   ierr = PetscFree(buf);CHKERRQ(ierr);
2514   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2515   ierr = PetscFree(rowners);CHKERRQ(ierr);
2516   ierr = PetscFree(dlens);CHKERRQ(ierr);
2517   ierr = PetscFree(mask);CHKERRQ(ierr);
2518   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2519   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 
2523 #undef __FUNC__
2524 #define  __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor"
2525 /*@
2526    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2527 
2528    Input Parameters:
2529 .  mat  - the matrix
2530 .  fact - factor
2531 
2532    Collective on Mat
2533 
2534    Level: advanced
2535 
2536   Notes:
2537    This can also be set by the command line option: -mat_use_hash_table fact
2538 
2539 .keywords: matrix, hashtable, factor, HT
2540 
2541 .seealso: MatSetOption()
2542 @*/
2543 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2544 {
2545   Mat_MPIBAIJ *baij;
2546 
2547   PetscFunctionBegin;
2548   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2549   if (mat->type != MATMPIBAIJ) {
2550     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2551   }
2552   baij = (Mat_MPIBAIJ*)mat->data;
2553   baij->ht_fact = fact;
2554   PetscFunctionReturn(0);
2555 }
2556