xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 302e33e32b979537e1bd20d2c4ea1f2185eb53ee)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.168 1999/05/04 20:32:41 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 EXTERN_C_BEGIN
20 #undef __FUNC__
21 #define __FUNC__ "MatStoreValues_MPIBAIJ"
22 int MatStoreValues_MPIBAIJ(Mat mat)
23 {
24   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
25   int         ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
29   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 EXTERN_C_END
33 
34 EXTERN_C_BEGIN
35 #undef __FUNC__
36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
37 int MatRetrieveValues_MPIBAIJ(Mat mat)
38 {
39   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
40   int         ierr;
41 
42   PetscFunctionBegin;
43   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
44   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
45   PetscFunctionReturn(0);
46 }
47 EXTERN_C_END
48 
49 /*
50      Local utility routine that creates a mapping from the global column
51    number to the local number in the off-diagonal part of the local
52    storage of the matrix.  This is done in a non scable way since the
53    length of colmap equals the global matrix length.
54 */
55 #undef __FUNC__
56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
57 static int CreateColmap_MPIBAIJ_Private(Mat mat)
58 {
59   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
60   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
61   int         nbs = B->nbs,i,bs=B->bs,ierr;
62 
63   PetscFunctionBegin;
64 #if defined (USE_CTABLE)
65   ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
66   for ( i=0; i<nbs; i++ ){
67     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
68   }
69 #else
70   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
71   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
72   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
73   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
74 #endif
75   PetscFunctionReturn(0);
76 }
77 
78 #define CHUNKSIZE  10
79 
80 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
81 { \
82  \
83     brow = row/bs;  \
84     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
85     rmax = aimax[brow]; nrow = ailen[brow]; \
86       bcol = col/bs; \
87       ridx = row % bs; cidx = col % bs; \
88       low = 0; high = nrow; \
89       while (high-low > 3) { \
90         t = (low+high)/2; \
91         if (rp[t] > bcol) high = t; \
92         else              low  = t; \
93       } \
94       for ( _i=low; _i<high; _i++ ) { \
95         if (rp[_i] > bcol) break; \
96         if (rp[_i] == bcol) { \
97           bap  = ap +  bs2*_i + bs*cidx + ridx; \
98           if (addv == ADD_VALUES) *bap += value;  \
99           else                    *bap  = value;  \
100           goto a_noinsert; \
101         } \
102       } \
103       if (a->nonew == 1) goto a_noinsert; \
104       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
105       if (nrow >= rmax) { \
106         /* there is no extra room in row, therefore enlarge */ \
107         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
108         Scalar *new_a; \
109  \
110         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
111  \
112         /* malloc new storage space */ \
113         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
114         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
115         new_j   = (int *) (new_a + bs2*new_nz); \
116         new_i   = new_j + new_nz; \
117  \
118         /* copy over old data into new slots */ \
119         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
120         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
121         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
122         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
123         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
124                           len*sizeof(int));CHKERRQ(ierr); \
125         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
126         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
127         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
128                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
129         /* free up old matrix storage */ \
130         PetscFree(a->a);  \
131         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = 1; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for ( ii=N; ii>=_i; ii-- ) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 
155 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
156 { \
157  \
158     brow = row/bs;  \
159     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
160     rmax = bimax[brow]; nrow = bilen[brow]; \
161       bcol = col/bs; \
162       ridx = row % bs; cidx = col % bs; \
163       low = 0; high = nrow; \
164       while (high-low > 3) { \
165         t = (low+high)/2; \
166         if (rp[t] > bcol) high = t; \
167         else              low  = t; \
168       } \
169       for ( _i=low; _i<high; _i++ ) { \
170         if (rp[_i] > bcol) break; \
171         if (rp[_i] == bcol) { \
172           bap  = ap +  bs2*_i + bs*cidx + ridx; \
173           if (addv == ADD_VALUES) *bap += value;  \
174           else                    *bap  = value;  \
175           goto b_noinsert; \
176         } \
177       } \
178       if (b->nonew == 1) goto b_noinsert; \
179       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
180       if (nrow >= rmax) { \
181         /* there is no extra room in row, therefore enlarge */ \
182         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
183         Scalar *new_a; \
184  \
185         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
186  \
187         /* malloc new storage space */ \
188         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
189         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
190         new_j   = (int *) (new_a + bs2*new_nz); \
191         new_i   = new_j + new_nz; \
192  \
193         /* copy over old data into new slots */ \
194         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
195         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
196         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
197         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
198         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
199                            len*sizeof(int));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
201         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
202         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
203                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
204         /* free up old matrix storage */ \
205         PetscFree(b->a);  \
206         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = 1; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for ( ii=N; ii>=_i; ii-- ) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 
230 #undef __FUNC__
231 #define __FUNC__ "MatSetValues_MPIBAIJ"
232 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
233 {
234   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
235   Scalar      value;
236   int         ierr,i,j,row,col;
237   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
238   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
239   int         cend_orig=baij->cend_bs,bs=baij->bs;
240 
241   /* Some Variables required in the macro */
242   Mat         A = baij->A;
243   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
244   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
245   Scalar      *aa=a->a;
246 
247   Mat         B = baij->B;
248   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
249   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
250   Scalar      *ba=b->a;
251 
252   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
253   int         low,high,t,ridx,cidx,bs2=a->bs2;
254   Scalar      *ap,*bap;
255 
256   PetscFunctionBegin;
257   for ( i=0; i<m; i++ ) {
258     if (im[i] < 0) continue;
259 #if defined(USE_PETSC_BOPT_g)
260     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
261 #endif
262     if (im[i] >= rstart_orig && im[i] < rend_orig) {
263       row = im[i] - rstart_orig;
264       for ( j=0; j<n; j++ ) {
265         if (in[j] >= cstart_orig && in[j] < cend_orig){
266           col = in[j] - cstart_orig;
267           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
268           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
269           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
270         } else if (in[j] < 0) continue;
271 #if defined(USE_PETSC_BOPT_g)
272         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
273 #endif
274         else {
275           if (mat->was_assembled) {
276             if (!baij->colmap) {
277               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
278             }
279 #if defined (USE_CTABLE)
280             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
281             col  = col - 1 + in[j]%bs;
282 #else
283             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
284 #endif
285             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
286               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
287               col =  in[j];
288               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
289               B = baij->B;
290               b = (Mat_SeqBAIJ *) (B)->data;
291               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
292               ba=b->a;
293             }
294           } else col = in[j];
295           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
296           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
297           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
298         }
299       }
300     } else {
301       if ( !baij->donotstash) {
302         if (roworiented) {
303           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
304         } else {
305           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
306         }
307       }
308     }
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNC__
314 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
315 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
316 {
317   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
318   Scalar      *value,*barray=baij->barray;
319   int         ierr,i,j,ii,jj,row,col;
320   int         roworiented = baij->roworiented,rstart=baij->rstart ;
321   int         rend=baij->rend,cstart=baij->cstart,stepval;
322   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
323 
324   if(!barray) {
325     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
326   }
327 
328   if (roworiented) {
329     stepval = (n-1)*bs;
330   } else {
331     stepval = (m-1)*bs;
332   }
333   for ( i=0; i<m; i++ ) {
334     if (im[i] < 0) continue;
335 #if defined(USE_PETSC_BOPT_g)
336     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
337 #endif
338     if (im[i] >= rstart && im[i] < rend) {
339       row = im[i] - rstart;
340       for ( j=0; j<n; j++ ) {
341         /* If NumCol = 1 then a copy is not required */
342         if ((roworiented) && (n == 1)) {
343           barray = v + i*bs2;
344         } else if((!roworiented) && (m == 1)) {
345           barray = v + j*bs2;
346         } else { /* Here a copy is required */
347           if (roworiented) {
348             value = v + i*(stepval+bs)*bs + j*bs;
349           } else {
350             value = v + j*(stepval+bs)*bs + i*bs;
351           }
352           for ( ii=0; ii<bs; ii++,value+=stepval ) {
353             for (jj=0; jj<bs; jj++ ) {
354               *barray++  = *value++;
355             }
356           }
357           barray -=bs2;
358         }
359 
360         if (in[j] >= cstart && in[j] < cend){
361           col  = in[j] - cstart;
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364         else if (in[j] < 0) continue;
365 #if defined(USE_PETSC_BOPT_g)
366         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
367 #endif
368         else {
369           if (mat->was_assembled) {
370             if (!baij->colmap) {
371               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
372             }
373 
374 #if defined(USE_PETSC_BOPT_g)
375 #if defined (USE_CTABLE)
376             { int data;
377             ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
378             if((data - 1) % bs)
379 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
380             }
381 #else
382             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
383 #endif
384 #endif
385 #if defined (USE_CTABLE)
386 	    ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
387             col  = (col - 1)/bs;
388 #else
389             col = (baij->colmap[in[j]] - 1)/bs;
390 #endif
391             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
392               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
393               col =  in[j];
394             }
395           }
396           else col = in[j];
397           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
398         }
399       }
400     } else {
401       if (!baij->donotstash) {
402         if (roworiented) {
403           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
404         } else {
405           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
406         }
407       }
408     }
409   }
410   PetscFunctionReturn(0);
411 }
412 #define HASH_KEY 0.6180339887
413 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
414 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
415 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
416 #undef __FUNC__
417 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
418 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
419 {
420   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
421   int         ierr,i,j,row,col;
422   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
423   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
424   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
425   double      tmp;
426   Scalar      ** HD = baij->hd,value;
427 #if defined(USE_PETSC_BOPT_g)
428   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
429 #endif
430 
431   PetscFunctionBegin;
432 
433   for ( i=0; i<m; i++ ) {
434 #if defined(USE_PETSC_BOPT_g)
435     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
436     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
437 #endif
438       row = im[i];
439     if (row >= rstart_orig && row < rend_orig) {
440       for ( j=0; j<n; j++ ) {
441         col = in[j];
442         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
443         /* Look up into the Hash Table */
444         key = (row/bs)*Nbs+(col/bs)+1;
445         h1  = HASH(size,key,tmp);
446 
447 
448         idx = h1;
449 #if defined(USE_PETSC_BOPT_g)
450         insert_ct++;
451         total_ct++;
452         if (HT[idx] != key) {
453           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
454           if (idx == size) {
455             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
456             if (idx == h1) {
457               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
458             }
459           }
460         }
461 #else
462         if (HT[idx] != key) {
463           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
464           if (idx == size) {
465             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
466             if (idx == h1) {
467               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
468             }
469           }
470         }
471 #endif
472         /* A HASH table entry is found, so insert the values at the correct address */
473         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
474         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
475       }
476     } else {
477       if (!baij->donotstash) {
478         if (roworiented) {
479           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
480         } else {
481           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
482         }
483       }
484     }
485   }
486 #if defined(USE_PETSC_BOPT_g)
487   baij->ht_total_ct = total_ct;
488   baij->ht_insert_ct = insert_ct;
489 #endif
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNC__
494 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
495 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
496 {
497   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
498   int         ierr,i,j,ii,jj,row,col;
499   int         roworiented = baij->roworiented,rstart=baij->rstart ;
500   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
501   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
502   double      tmp;
503   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
504 #if defined(USE_PETSC_BOPT_g)
505   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
506 #endif
507 
508   PetscFunctionBegin;
509 
510   if (roworiented) {
511     stepval = (n-1)*bs;
512   } else {
513     stepval = (m-1)*bs;
514   }
515   for ( i=0; i<m; i++ ) {
516 #if defined(USE_PETSC_BOPT_g)
517     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
518     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
519 #endif
520     row   = im[i];
521     v_t   = v + i*bs2;
522     if (row >= rstart && row < rend) {
523       for ( j=0; j<n; j++ ) {
524         col = in[j];
525 
526         /* Look up into the Hash Table */
527         key = row*Nbs+col+1;
528         h1  = HASH(size,key,tmp);
529 
530         idx = h1;
531 #if defined(USE_PETSC_BOPT_g)
532         total_ct++;
533         insert_ct++;
534        if (HT[idx] != key) {
535           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
536           if (idx == size) {
537             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
538             if (idx == h1) {
539               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
540             }
541           }
542         }
543 #else
544         if (HT[idx] != key) {
545           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
546           if (idx == size) {
547             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
548             if (idx == h1) {
549               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
550             }
551           }
552         }
553 #endif
554         baij_a = HD[idx];
555         if (roworiented) {
556           /*value = v + i*(stepval+bs)*bs + j*bs;*/
557           /* value = v + (i*(stepval+bs)+j)*bs; */
558           value = v_t;
559           v_t  += bs;
560           if (addv == ADD_VALUES) {
561             for ( ii=0; ii<bs; ii++,value+=stepval) {
562               for ( jj=ii; jj<bs2; jj+=bs ) {
563                 baij_a[jj]  += *value++;
564               }
565             }
566           } else {
567             for ( ii=0; ii<bs; ii++,value+=stepval) {
568               for ( jj=ii; jj<bs2; jj+=bs ) {
569                 baij_a[jj]  = *value++;
570               }
571             }
572           }
573         } else {
574           value = v + j*(stepval+bs)*bs + i*bs;
575           if (addv == ADD_VALUES) {
576             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
577               for ( jj=0; jj<bs; jj++ ) {
578                 baij_a[jj]  += *value++;
579               }
580             }
581           } else {
582             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
583               for ( jj=0; jj<bs; jj++ ) {
584                 baij_a[jj]  = *value++;
585               }
586             }
587           }
588         }
589       }
590     } else {
591       if (!baij->donotstash) {
592         if (roworiented) {
593           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
594         } else {
595           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
596         }
597       }
598     }
599   }
600 #if defined(USE_PETSC_BOPT_g)
601   baij->ht_total_ct = total_ct;
602   baij->ht_insert_ct = insert_ct;
603 #endif
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNC__
608 #define __FUNC__ "MatGetValues_MPIBAIJ"
609 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
610 {
611   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
612   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
613   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
614 
615   PetscFunctionBegin;
616   for ( i=0; i<m; i++ ) {
617     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
618     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
619     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
620       row = idxm[i] - bsrstart;
621       for ( j=0; j<n; j++ ) {
622         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
623         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
624         if (idxn[j] >= bscstart && idxn[j] < bscend){
625           col = idxn[j] - bscstart;
626           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
627         } else {
628           if (!baij->colmap) {
629             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
630           }
631 #if defined (USE_CTABLE)
632           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
633           data --;
634 #else
635           data = baij->colmap[idxn[j]/bs]-1;
636 #endif
637           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
638           else {
639             col  = data + idxn[j]%bs;
640             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
641           }
642         }
643       }
644     } else {
645       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
646     }
647   }
648  PetscFunctionReturn(0);
649 }
650 
651 #undef __FUNC__
652 #define __FUNC__ "MatNorm_MPIBAIJ"
653 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
654 {
655   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
656   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
657   int        ierr, i,bs2=baij->bs2;
658   double     sum = 0.0;
659   Scalar     *v;
660 
661   PetscFunctionBegin;
662   if (baij->size == 1) {
663     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
664   } else {
665     if (type == NORM_FROBENIUS) {
666       v = amat->a;
667       for (i=0; i<amat->nz*bs2; i++ ) {
668 #if defined(USE_PETSC_COMPLEX)
669         sum += PetscReal(PetscConj(*v)*(*v)); v++;
670 #else
671         sum += (*v)*(*v); v++;
672 #endif
673       }
674       v = bmat->a;
675       for (i=0; i<bmat->nz*bs2; i++ ) {
676 #if defined(USE_PETSC_COMPLEX)
677         sum += PetscReal(PetscConj(*v)*(*v)); v++;
678 #else
679         sum += (*v)*(*v); v++;
680 #endif
681       }
682       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
683       *norm = sqrt(*norm);
684     } else {
685       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
686     }
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 
692 /*
693   Creates the hash table, and sets the table
694   This table is created only once.
695   If new entried need to be added to the matrix
696   then the hash table has to be destroyed and
697   recreated.
698 */
699 #undef __FUNC__
700 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
701 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
702 {
703   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
704   Mat         A = baij->A, B=baij->B;
705   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
706   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
707   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
708   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
709   int         *HT,key;
710   Scalar      **HD;
711   double      tmp;
712 #if defined(USE_PETSC_BOPT_g)
713   int         ct=0,max=0;
714 #endif
715 
716   PetscFunctionBegin;
717   baij->ht_size=(int)(factor*nz);
718   size = baij->ht_size;
719 
720   if (baij->ht) {
721     PetscFunctionReturn(0);
722   }
723 
724   /* Allocate Memory for Hash Table */
725   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
726   baij->ht = (int*)(baij->hd + size);
727   HD = baij->hd;
728   HT = baij->ht;
729 
730 
731   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
732 
733 
734   /* Loop Over A */
735   for ( i=0; i<a->mbs; i++ ) {
736     for ( j=ai[i]; j<ai[i+1]; j++ ) {
737       row = i+rstart;
738       col = aj[j]+cstart;
739 
740       key = row*Nbs + col + 1;
741       h1  = HASH(size,key,tmp);
742       for ( k=0; k<size; k++ ){
743         if (HT[(h1+k)%size] == 0.0) {
744           HT[(h1+k)%size] = key;
745           HD[(h1+k)%size] = a->a + j*bs2;
746           break;
747 #if defined(USE_PETSC_BOPT_g)
748         } else {
749           ct++;
750 #endif
751         }
752       }
753 #if defined(USE_PETSC_BOPT_g)
754       if (k> max) max = k;
755 #endif
756     }
757   }
758   /* Loop Over B */
759   for ( i=0; i<b->mbs; i++ ) {
760     for ( j=bi[i]; j<bi[i+1]; j++ ) {
761       row = i+rstart;
762       col = garray[bj[j]];
763       key = row*Nbs + col + 1;
764       h1  = HASH(size,key,tmp);
765       for ( k=0; k<size; k++ ){
766         if (HT[(h1+k)%size] == 0.0) {
767           HT[(h1+k)%size] = key;
768           HD[(h1+k)%size] = b->a + j*bs2;
769           break;
770 #if defined(USE_PETSC_BOPT_g)
771         } else {
772           ct++;
773 #endif
774         }
775       }
776 #if defined(USE_PETSC_BOPT_g)
777       if (k> max) max = k;
778 #endif
779     }
780   }
781 
782   /* Print Summary */
783 #if defined(USE_PETSC_BOPT_g)
784   for ( i=0,j=0; i<size; i++)
785     if (HT[i]) {j++;}
786   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
787            (j== 0)? 0.0:((double)(ct+j))/j,max);
788 #endif
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNC__
793 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
794 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
795 {
796   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
797   int         ierr,nstash,reallocs;
798   InsertMode  addv;
799 
800   PetscFunctionBegin;
801   if (baij->donotstash) {
802     PetscFunctionReturn(0);
803   }
804 
805   /* make sure all processors are either in INSERTMODE or ADDMODE */
806   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
807   if (addv == (ADD_VALUES|INSERT_VALUES)) {
808     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
809   }
810   mat->insertmode = addv; /* in case this processor had no cache */
811 
812   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
813   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
815   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
816   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
817   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNC__
822 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
823 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
826   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
827   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
828   int         *row,*col,other_disassembled,r1,r2,r3;
829   Scalar      *val;
830   InsertMode  addv = mat->insertmode;
831 
832   PetscFunctionBegin;
833   if (!baij->donotstash) {
834     while (1) {
835       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
836       if (!flg) break;
837 
838       for ( i=0; i<n; ) {
839         /* Now identify the consecutive vals belonging to the same row */
840         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
841         if (j < n) ncols = j-i;
842         else       ncols = n-i;
843         /* Now assemble all these values with a single function call */
844         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
845         i = j;
846       }
847     }
848     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
849     /* Now process the block-stash. Since the values are stashed column-oriented,
850        set the roworiented flag to column oriented, and after MatSetValues()
851        restore the original flags */
852     r1 = baij->roworiented;
853     r2 = a->roworiented;
854     r3 = b->roworiented;
855     baij->roworiented = 0;
856     a->roworiented    = 0;
857     b->roworiented    = 0;
858     while (1) {
859       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
860       if (!flg) break;
861 
862       for ( i=0; i<n; ) {
863         /* Now identify the consecutive vals belonging to the same row */
864         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
865         if (j < n) ncols = j-i;
866         else       ncols = n-i;
867         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
868         i = j;
869       }
870     }
871     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
872     baij->roworiented = r1;
873     a->roworiented    = r2;
874     b->roworiented    = r3;
875   }
876 
877   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
878   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
879 
880   /* determine if any processor has disassembled, if so we must
881      also disassemble ourselfs, in order that we may reassemble. */
882   /*
883      if nonzero structure of submatrix B cannot change then we know that
884      no processor disassembled thus we can skip this stuff
885   */
886   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
887     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
888     if (mat->was_assembled && !other_disassembled) {
889       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
890     }
891   }
892 
893   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
894     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
895   }
896   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
897   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
898 
899 #if defined(USE_PETSC_BOPT_g)
900   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
901     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
902              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
903     baij->ht_total_ct  = 0;
904     baij->ht_insert_ct = 0;
905   }
906 #endif
907   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
908     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
909     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
910     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
911   }
912 
913   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNC__
918 #define __FUNC__ "MatView_MPIBAIJ_Binary"
919 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
920 {
921   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
922   int          ierr;
923 
924   PetscFunctionBegin;
925   if (baij->size == 1) {
926     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
927   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNC__
932 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
933 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
934 {
935   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
936   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
937   FILE         *fd;
938   ViewerType   vtype;
939 
940   PetscFunctionBegin;
941   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
942   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
943     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
944     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
945       MatInfo info;
946       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
947       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
948       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
949       ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr);
950       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
951               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
952               baij->bs,(int)info.memory);
953       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
954       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
955       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
956       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
957       fflush(fd);
958       ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr);
959       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
960       PetscFunctionReturn(0);
961     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
962       ierr = PetscPrintf(mat->comm,"  block size is %d\n",bs);CHKERRQ(ierr);
963       PetscFunctionReturn(0);
964     }
965   }
966 
967   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
968     Draw       draw;
969     PetscTruth isnull;
970     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
971     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
972   }
973 
974   if (size == 1) {
975     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
976   } else {
977     /* assemble the entire matrix onto first processor. */
978     Mat         A;
979     Mat_SeqBAIJ *Aloc;
980     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
981     int         mbs = baij->mbs;
982     Scalar      *a;
983 
984     if (!rank) {
985       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
986     } else {
987       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
988     }
989     PLogObjectParent(mat,A);
990 
991     /* copy over the A part */
992     Aloc = (Mat_SeqBAIJ*) baij->A->data;
993     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
994     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
995 
996     for ( i=0; i<mbs; i++ ) {
997       rvals[0] = bs*(baij->rstart + i);
998       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
999       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1000         col = (baij->cstart+aj[j])*bs;
1001         for (k=0; k<bs; k++ ) {
1002           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1003           col++; a += bs;
1004         }
1005       }
1006     }
1007     /* copy over the B part */
1008     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1009     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1010     for ( i=0; i<mbs; i++ ) {
1011       rvals[0] = bs*(baij->rstart + i);
1012       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1013       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1014         col = baij->garray[aj[j]]*bs;
1015         for (k=0; k<bs; k++ ) {
1016           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1017           col++; a += bs;
1018         }
1019       }
1020     }
1021     PetscFree(rvals);
1022     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1023     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1024     /*
1025        Everyone has to call to draw the matrix since the graphics waits are
1026        synchronized across all processors that share the Draw object
1027     */
1028     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1029       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
1030     }
1031     ierr = MatDestroy(A);CHKERRQ(ierr);
1032   }
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 
1037 
1038 #undef __FUNC__
1039 #define __FUNC__ "MatView_MPIBAIJ"
1040 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1041 {
1042   int         ierr;
1043   ViewerType  vtype;
1044 
1045   PetscFunctionBegin;
1046   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
1047   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1048       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
1049     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1050   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1051     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1052   } else {
1053     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNC__
1059 #define __FUNC__ "MatDestroy_MPIBAIJ"
1060 int MatDestroy_MPIBAIJ(Mat mat)
1061 {
1062   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1063   int         ierr;
1064 
1065   PetscFunctionBegin;
1066   if (--mat->refct > 0) PetscFunctionReturn(0);
1067 
1068   if (mat->mapping) {
1069     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1070   }
1071   if (mat->bmapping) {
1072     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1073   }
1074   if (mat->rmap) {
1075     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1076   }
1077   if (mat->cmap) {
1078     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1079   }
1080 #if defined(USE_PETSC_LOG)
1081   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1082 #endif
1083 
1084   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1085   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1086 
1087   PetscFree(baij->rowners);
1088   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1089   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1090 #if defined (USE_CTABLE)
1091   if (baij->colmap) TableDelete(baij->colmap);
1092 #else
1093   if (baij->colmap) PetscFree(baij->colmap);
1094 #endif
1095   if (baij->garray) PetscFree(baij->garray);
1096   if (baij->lvec)   VecDestroy(baij->lvec);
1097   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1098   if (baij->rowvalues) PetscFree(baij->rowvalues);
1099   if (baij->barray) PetscFree(baij->barray);
1100   if (baij->hd) PetscFree(baij->hd);
1101   PetscFree(baij);
1102   PLogObjectDestroy(mat);
1103   PetscHeaderDestroy(mat);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNC__
1108 #define __FUNC__ "MatMult_MPIBAIJ"
1109 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1110 {
1111   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1112   int         ierr, nt;
1113 
1114   PetscFunctionBegin;
1115   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1116   if (nt != a->n) {
1117     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1118   }
1119   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1120   if (nt != a->m) {
1121     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1122   }
1123   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1124   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1125   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1126   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1127   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNC__
1132 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1133 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1134 {
1135   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1136   int        ierr;
1137 
1138   PetscFunctionBegin;
1139   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1140   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1141   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1142   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNC__
1147 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1148 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1149 {
1150   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1151   int         ierr;
1152 
1153   PetscFunctionBegin;
1154   /* do nondiagonal part */
1155   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1156   /* send it on its way */
1157   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1158   /* do local part */
1159   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1160   /* receive remote parts: note this assumes the values are not actually */
1161   /* inserted in yy until the next line, which is true for my implementation*/
1162   /* but is not perhaps always true. */
1163   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNC__
1168 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1169 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1170 {
1171   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1172   int         ierr;
1173 
1174   PetscFunctionBegin;
1175   /* do nondiagonal part */
1176   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1177   /* send it on its way */
1178   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1179   /* do local part */
1180   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1181   /* receive remote parts: note this assumes the values are not actually */
1182   /* inserted in yy until the next line, which is true for my implementation*/
1183   /* but is not perhaps always true. */
1184   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 /*
1189   This only works correctly for square matrices where the subblock A->A is the
1190    diagonal block
1191 */
1192 #undef __FUNC__
1193 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1194 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1195 {
1196   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1197   int         ierr;
1198 
1199   PetscFunctionBegin;
1200   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1201   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNC__
1206 #define __FUNC__ "MatScale_MPIBAIJ"
1207 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1208 {
1209   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1210   int         ierr;
1211 
1212   PetscFunctionBegin;
1213   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1214   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNC__
1219 #define __FUNC__ "MatGetSize_MPIBAIJ"
1220 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1221 {
1222   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1223 
1224   PetscFunctionBegin;
1225   if (m) *m = mat->M;
1226   if (n) *n = mat->N;
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNC__
1231 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1232 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1233 {
1234   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1235 
1236   PetscFunctionBegin;
1237   *m = mat->m; *n = mat->n;
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNC__
1242 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1243 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1244 {
1245   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1246 
1247   PetscFunctionBegin;
1248   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNC__
1253 #define __FUNC__ "MatGetRow_MPIBAIJ"
1254 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1255 {
1256   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1257   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1258   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1259   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1260   int        *cmap, *idx_p,cstart = mat->cstart;
1261 
1262   PetscFunctionBegin;
1263   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1264   mat->getrowactive = PETSC_TRUE;
1265 
1266   if (!mat->rowvalues && (idx || v)) {
1267     /*
1268         allocate enough space to hold information from the longest row.
1269     */
1270     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1271     int     max = 1,mbs = mat->mbs,tmp;
1272     for ( i=0; i<mbs; i++ ) {
1273       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1274       if (max < tmp) { max = tmp; }
1275     }
1276     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1277     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1278   }
1279 
1280   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1281   lrow = row - brstart;
1282 
1283   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1284   if (!v)   {pvA = 0; pvB = 0;}
1285   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1286   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1287   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1288   nztot = nzA + nzB;
1289 
1290   cmap  = mat->garray;
1291   if (v  || idx) {
1292     if (nztot) {
1293       /* Sort by increasing column numbers, assuming A and B already sorted */
1294       int imark = -1;
1295       if (v) {
1296         *v = v_p = mat->rowvalues;
1297         for ( i=0; i<nzB; i++ ) {
1298           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1299           else break;
1300         }
1301         imark = i;
1302         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1303         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1304       }
1305       if (idx) {
1306         *idx = idx_p = mat->rowindices;
1307         if (imark > -1) {
1308           for ( i=0; i<imark; i++ ) {
1309             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1310           }
1311         } else {
1312           for ( i=0; i<nzB; i++ ) {
1313             if (cmap[cworkB[i]/bs] < cstart)
1314               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1315             else break;
1316           }
1317           imark = i;
1318         }
1319         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1320         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1321       }
1322     } else {
1323       if (idx) *idx = 0;
1324       if (v)   *v   = 0;
1325     }
1326   }
1327   *nz = nztot;
1328   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1329   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 #undef __FUNC__
1334 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1335 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1336 {
1337   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1338 
1339   PetscFunctionBegin;
1340   if (baij->getrowactive == PETSC_FALSE) {
1341     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1342   }
1343   baij->getrowactive = PETSC_FALSE;
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNC__
1348 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1349 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1350 {
1351   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1352 
1353   PetscFunctionBegin;
1354   *bs = baij->bs;
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNC__
1359 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1360 int MatZeroEntries_MPIBAIJ(Mat A)
1361 {
1362   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1363   int         ierr;
1364 
1365   PetscFunctionBegin;
1366   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1367   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNC__
1372 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1373 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1374 {
1375   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1376   Mat         A = a->A, B = a->B;
1377   int         ierr;
1378   double      isend[5], irecv[5];
1379 
1380   PetscFunctionBegin;
1381   info->block_size     = (double)a->bs;
1382   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1383   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1384   isend[3] = info->memory;  isend[4] = info->mallocs;
1385   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1386   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1387   isend[3] += info->memory;  isend[4] += info->mallocs;
1388   if (flag == MAT_LOCAL) {
1389     info->nz_used      = isend[0];
1390     info->nz_allocated = isend[1];
1391     info->nz_unneeded  = isend[2];
1392     info->memory       = isend[3];
1393     info->mallocs      = isend[4];
1394   } else if (flag == MAT_GLOBAL_MAX) {
1395     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1396     info->nz_used      = irecv[0];
1397     info->nz_allocated = irecv[1];
1398     info->nz_unneeded  = irecv[2];
1399     info->memory       = irecv[3];
1400     info->mallocs      = irecv[4];
1401   } else if (flag == MAT_GLOBAL_SUM) {
1402     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1403     info->nz_used      = irecv[0];
1404     info->nz_allocated = irecv[1];
1405     info->nz_unneeded  = irecv[2];
1406     info->memory       = irecv[3];
1407     info->mallocs      = irecv[4];
1408   } else {
1409     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1410   }
1411   info->rows_global       = (double)a->M;
1412   info->columns_global    = (double)a->N;
1413   info->rows_local        = (double)a->m;
1414   info->columns_local     = (double)a->N;
1415   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1416   info->fill_ratio_needed = 0;
1417   info->factor_mallocs    = 0;
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNC__
1422 #define __FUNC__ "MatSetOption_MPIBAIJ"
1423 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1424 {
1425   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1426   int         ierr;
1427 
1428   PetscFunctionBegin;
1429   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1430       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1431       op == MAT_COLUMNS_UNSORTED ||
1432       op == MAT_COLUMNS_SORTED ||
1433       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1434       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1435         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1436         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1437   } else if (op == MAT_ROW_ORIENTED) {
1438         a->roworiented = 1;
1439         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1440         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1441   } else if (op == MAT_ROWS_SORTED ||
1442              op == MAT_ROWS_UNSORTED ||
1443              op == MAT_SYMMETRIC ||
1444              op == MAT_STRUCTURALLY_SYMMETRIC ||
1445              op == MAT_YES_NEW_DIAGONALS ||
1446              op == MAT_USE_HASH_TABLE) {
1447     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1448   } else if (op == MAT_COLUMN_ORIENTED) {
1449     a->roworiented = 0;
1450     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1451     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1452   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1453     a->donotstash = 1;
1454   } else if (op == MAT_NO_NEW_DIAGONALS) {
1455     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1456   } else if (op == MAT_USE_HASH_TABLE) {
1457     a->ht_flag = 1;
1458   } else {
1459     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1460   }
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNC__
1465 #define __FUNC__ "MatTranspose_MPIBAIJ("
1466 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1467 {
1468   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1469   Mat_SeqBAIJ *Aloc;
1470   Mat        B;
1471   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1472   int        bs=baij->bs,mbs=baij->mbs;
1473   Scalar     *a;
1474 
1475   PetscFunctionBegin;
1476   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1477   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1478 CHKERRQ(ierr);
1479 
1480   /* copy over the A part */
1481   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1482   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1483   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1484 
1485   for ( i=0; i<mbs; i++ ) {
1486     rvals[0] = bs*(baij->rstart + i);
1487     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1488     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1489       col = (baij->cstart+aj[j])*bs;
1490       for (k=0; k<bs; k++ ) {
1491         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1492         col++; a += bs;
1493       }
1494     }
1495   }
1496   /* copy over the B part */
1497   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1498   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1499   for ( i=0; i<mbs; i++ ) {
1500     rvals[0] = bs*(baij->rstart + i);
1501     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1502     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1503       col = baij->garray[aj[j]]*bs;
1504       for (k=0; k<bs; k++ ) {
1505         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1506         col++; a += bs;
1507       }
1508     }
1509   }
1510   PetscFree(rvals);
1511   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1512   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1513 
1514   if (matout != PETSC_NULL) {
1515     *matout = B;
1516   } else {
1517     PetscOps *Abops;
1518     MatOps   Aops;
1519 
1520     /* This isn't really an in-place transpose .... but free data structures from baij */
1521     PetscFree(baij->rowners);
1522     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1523     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1524 #if defined (USE_CTABLE)
1525     if (baij->colmap) TableDelete(baij->colmap);
1526 #else
1527     if (baij->colmap) PetscFree(baij->colmap);
1528 #endif
1529     if (baij->garray) PetscFree(baij->garray);
1530     if (baij->lvec) VecDestroy(baij->lvec);
1531     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1532     PetscFree(baij);
1533 
1534     /*
1535        This is horrible, horrible code. We need to keep the
1536       A pointers for the bops and ops but copy everything
1537       else from C.
1538     */
1539     Abops   = A->bops;
1540     Aops    = A->ops;
1541     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1542     A->bops = Abops;
1543     A->ops  = Aops;
1544 
1545     PetscHeaderDestroy(B);
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNC__
1551 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1552 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1553 {
1554   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1555   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1556   int ierr,s1,s2,s3;
1557 
1558   PetscFunctionBegin;
1559   if (ll)  {
1560     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1561     ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
1562     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1563     ierr = MatDiagonalScale(a,ll,0);CHKERRQ(ierr);
1564     ierr = MatDiagonalScale(b,ll,0);CHKERRQ(ierr);
1565   }
1566   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNC__
1571 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1572 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1573 {
1574   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1575   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1576   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1577   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1578   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1579   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1580   MPI_Comm       comm = A->comm;
1581   MPI_Request    *send_waits,*recv_waits;
1582   MPI_Status     recv_status,*send_status;
1583   IS             istmp;
1584 
1585   PetscFunctionBegin;
1586   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1587   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1588 
1589   /*  first count number of contributors to each processor */
1590   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1591   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1592  procs   = nprocs + size;
1593   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1594   for ( i=0; i<N; i++ ) {
1595     idx = rows[i];
1596     found = 0;
1597     for ( j=0; j<size; j++ ) {
1598       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1599         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1600       }
1601     }
1602     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1603   }
1604   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1605 
1606   /* inform other processors of number of messages and max length*/
1607   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
1608   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1609   nrecvs = work[rank];
1610   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1611   nmax   = work[rank];
1612   PetscFree(work);
1613 
1614   /* post receives:   */
1615   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1616   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1617   for ( i=0; i<nrecvs; i++ ) {
1618     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1619   }
1620 
1621   /* do sends:
1622      1) starts[i] gives the starting index in svalues for stuff going to
1623      the ith processor
1624   */
1625   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1626   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1627   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
1628   starts[0] = 0;
1629   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1630   for ( i=0; i<N; i++ ) {
1631     svalues[starts[owner[i]]++] = rows[i];
1632   }
1633   ISRestoreIndices(is,&rows);
1634 
1635   starts[0] = 0;
1636   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1637   count = 0;
1638   for ( i=0; i<size; i++ ) {
1639     if (procs[i]) {
1640       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1641     }
1642   }
1643   PetscFree(starts);
1644 
1645   base = owners[rank]*bs;
1646 
1647   /*  wait on receives */
1648   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
1649   source = lens + nrecvs;
1650   count  = nrecvs; slen = 0;
1651   while (count) {
1652     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1653     /* unpack receives into our local space */
1654     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1655     source[imdex]  = recv_status.MPI_SOURCE;
1656     lens[imdex]  = n;
1657     slen += n;
1658     count--;
1659   }
1660   PetscFree(recv_waits);
1661 
1662   /* move the data into the send scatter */
1663   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
1664   count = 0;
1665   for ( i=0; i<nrecvs; i++ ) {
1666     values = rvalues + i*nmax;
1667     for ( j=0; j<lens[i]; j++ ) {
1668       lrows[count++] = values[j] - base;
1669     }
1670   }
1671   PetscFree(rvalues); PetscFree(lens);
1672   PetscFree(owner); PetscFree(nprocs);
1673 
1674   /* actually zap the local rows */
1675   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1676   PLogObjectParent(A,istmp);
1677 
1678   /*
1679         Zero the required rows. If the "diagonal block" of the matrix
1680      is square and the user wishes to set the diagonal we use seperate
1681      code so that MatSetValues() is not called for each diagonal allocating
1682      new memory, thus calling lots of mallocs and slowing things down.
1683 
1684        Contributed by: Mathew Knepley
1685   */
1686   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1687   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
1688   if (diag && (l->A->M == l->A->N)) {
1689     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
1690   } else if (diag) {
1691     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1692     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1693       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1694 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1695     }
1696     for ( i = 0; i < slen; i++ ) {
1697       row  = lrows[i] + rstart_bs;
1698       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1699     }
1700     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1702   } else {
1703     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1704   }
1705 
1706   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1707   PetscFree(lrows);
1708 
1709   /* wait on sends */
1710   if (nsends) {
1711     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1712     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1713     PetscFree(send_status);
1714   }
1715   PetscFree(send_waits); PetscFree(svalues);
1716 
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNC__
1721 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1722 int MatPrintHelp_MPIBAIJ(Mat A)
1723 {
1724   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1725   MPI_Comm    comm = A->comm;
1726   static int  called = 0;
1727   int         ierr;
1728 
1729   PetscFunctionBegin;
1730   if (!a->rank) {
1731     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1732   }
1733   if (called) {PetscFunctionReturn(0);} else called = 1;
1734   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1735   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1736   PetscFunctionReturn(0);
1737 }
1738 
1739 #undef __FUNC__
1740 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1741 int MatSetUnfactored_MPIBAIJ(Mat A)
1742 {
1743   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1744   int         ierr;
1745 
1746   PetscFunctionBegin;
1747   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1752 
1753 #undef __FUNC__
1754 #define __FUNC__ "MatEqual_MPIBAIJ"
1755 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
1756 {
1757   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
1758   Mat         a, b, c, d;
1759   PetscTruth  flg;
1760   int         ierr;
1761 
1762   PetscFunctionBegin;
1763   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1764   a = matA->A; b = matA->B;
1765   c = matB->A; d = matB->B;
1766 
1767   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1768   if (flg == PETSC_TRUE) {
1769     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1770   }
1771   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1772   PetscFunctionReturn(0);
1773 }
1774 
1775 /* -------------------------------------------------------------------*/
1776 static struct _MatOps MatOps_Values = {
1777   MatSetValues_MPIBAIJ,
1778   MatGetRow_MPIBAIJ,
1779   MatRestoreRow_MPIBAIJ,
1780   MatMult_MPIBAIJ,
1781   MatMultAdd_MPIBAIJ,
1782   MatMultTrans_MPIBAIJ,
1783   MatMultTransAdd_MPIBAIJ,
1784   0,
1785   0,
1786   0,
1787   0,
1788   0,
1789   0,
1790   0,
1791   MatTranspose_MPIBAIJ,
1792   MatGetInfo_MPIBAIJ,
1793   MatEqual_MPIBAIJ,
1794   MatGetDiagonal_MPIBAIJ,
1795   MatDiagonalScale_MPIBAIJ,
1796   MatNorm_MPIBAIJ,
1797   MatAssemblyBegin_MPIBAIJ,
1798   MatAssemblyEnd_MPIBAIJ,
1799   0,
1800   MatSetOption_MPIBAIJ,
1801   MatZeroEntries_MPIBAIJ,
1802   MatZeroRows_MPIBAIJ,
1803   0,
1804   0,
1805   0,
1806   0,
1807   MatGetSize_MPIBAIJ,
1808   MatGetLocalSize_MPIBAIJ,
1809   MatGetOwnershipRange_MPIBAIJ,
1810   0,
1811   0,
1812   0,
1813   0,
1814   MatDuplicate_MPIBAIJ,
1815   0,
1816   0,
1817   0,
1818   0,
1819   0,
1820   MatGetSubMatrices_MPIBAIJ,
1821   MatIncreaseOverlap_MPIBAIJ,
1822   MatGetValues_MPIBAIJ,
1823   0,
1824   MatPrintHelp_MPIBAIJ,
1825   MatScale_MPIBAIJ,
1826   0,
1827   0,
1828   0,
1829   MatGetBlockSize_MPIBAIJ,
1830   0,
1831   0,
1832   0,
1833   0,
1834   0,
1835   0,
1836   MatSetUnfactored_MPIBAIJ,
1837   0,
1838   MatSetValuesBlocked_MPIBAIJ,
1839   0,
1840   0,
1841   0,
1842   MatGetMaps_Petsc};
1843 
1844 
1845 EXTERN_C_BEGIN
1846 #undef __FUNC__
1847 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1848 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1849 {
1850   PetscFunctionBegin;
1851   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1852   *iscopy = PETSC_FALSE;
1853   PetscFunctionReturn(0);
1854 }
1855 EXTERN_C_END
1856 
1857 #undef __FUNC__
1858 #define __FUNC__ "MatCreateMPIBAIJ"
1859 /*@C
1860    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1861    (block compressed row).  For good matrix assembly performance
1862    the user should preallocate the matrix storage by setting the parameters
1863    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1864    performance can be increased by more than a factor of 50.
1865 
1866    Collective on MPI_Comm
1867 
1868    Input Parameters:
1869 +  comm - MPI communicator
1870 .  bs   - size of blockk
1871 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1872            This value should be the same as the local size used in creating the
1873            y vector for the matrix-vector product y = Ax.
1874 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1875            This value should be the same as the local size used in creating the
1876            x vector for the matrix-vector product y = Ax.
1877 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1878 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1879 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1880            submatrix  (same for all local rows)
1881 .  d_nnz - array containing the number of block nonzeros in the various block rows
1882            of the in diagonal portion of the local (possibly different for each block
1883            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1884 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1885            submatrix (same for all local rows).
1886 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1887            off-diagonal portion of the local submatrix (possibly different for
1888            each block row) or PETSC_NULL.
1889 
1890    Output Parameter:
1891 .  A - the matrix
1892 
1893    Options Database Keys:
1894 .   -mat_no_unroll - uses code that does not unroll the loops in the
1895                      block calculations (much slower)
1896 .   -mat_block_size - size of the blocks to use
1897 .   -mat_mpi - use the parallel matrix data structures even on one processor
1898                (defaults to using SeqBAIJ format on one processor)
1899 
1900    Notes:
1901    The user MUST specify either the local or global matrix dimensions
1902    (possibly both).
1903 
1904    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1905    than it must be used on all processors that share the object for that argument.
1906 
1907    Storage Information:
1908    For a square global matrix we define each processor's diagonal portion
1909    to be its local rows and the corresponding columns (a square submatrix);
1910    each processor's off-diagonal portion encompasses the remainder of the
1911    local matrix (a rectangular submatrix).
1912 
1913    The user can specify preallocated storage for the diagonal part of
1914    the local submatrix with either d_nz or d_nnz (not both).  Set
1915    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1916    memory allocation.  Likewise, specify preallocated storage for the
1917    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1918 
1919    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1920    the figure below we depict these three local rows and all columns (0-11).
1921 
1922 .vb
1923            0 1 2 3 4 5 6 7 8 9 10 11
1924           -------------------
1925    row 3  |  o o o d d d o o o o o o
1926    row 4  |  o o o d d d o o o o o o
1927    row 5  |  o o o d d d o o o o o o
1928           -------------------
1929 .ve
1930 
1931    Thus, any entries in the d locations are stored in the d (diagonal)
1932    submatrix, and any entries in the o locations are stored in the
1933    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1934    stored simply in the MATSEQBAIJ format for compressed row storage.
1935 
1936    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1937    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1938    In general, for PDE problems in which most nonzeros are near the diagonal,
1939    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1940    or you will get TERRIBLE performance; see the users' manual chapter on
1941    matrices.
1942 
1943    Level: intermediate
1944 
1945 .keywords: matrix, block, aij, compressed row, sparse, parallel
1946 
1947 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1948 @*/
1949 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1950                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1951 {
1952   Mat          B;
1953   Mat_MPIBAIJ  *b;
1954   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1955   int          flag1 = 0,flag2 = 0;
1956 
1957   PetscFunctionBegin;
1958   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1959 
1960   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1961   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
1962   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1963   if (!flag1 && !flag2 && size == 1) {
1964     if (M == PETSC_DECIDE) M = m;
1965     if (N == PETSC_DECIDE) N = n;
1966     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1967     PetscFunctionReturn(0);
1968   }
1969 
1970   *A = 0;
1971   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
1972   PLogObjectCreate(B);
1973   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
1974   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
1975   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1976 
1977   B->ops->destroy    = MatDestroy_MPIBAIJ;
1978   B->ops->view       = MatView_MPIBAIJ;
1979   B->mapping    = 0;
1980   B->factor     = 0;
1981   B->assembled  = PETSC_FALSE;
1982 
1983   B->insertmode = NOT_SET_VALUES;
1984   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1985   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
1986 
1987   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1988     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1989   }
1990   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1991     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1992   }
1993   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1994     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1995   }
1996   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1997   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1998 
1999   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2000     work[0] = m; work[1] = n;
2001     mbs = m/bs; nbs = n/bs;
2002     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2003     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2004     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2005   }
2006   if (m == PETSC_DECIDE) {
2007     Mbs = M/bs;
2008     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2009     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2010     m   = mbs*bs;
2011   }
2012   if (n == PETSC_DECIDE) {
2013     Nbs = N/bs;
2014     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2015     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2016     n   = nbs*bs;
2017   }
2018   if (mbs*bs != m || nbs*bs != n) {
2019     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2020   }
2021 
2022   b->m = m; B->m = m;
2023   b->n = n; B->n = n;
2024   b->N = N; B->N = N;
2025   b->M = M; B->M = M;
2026   b->bs  = bs;
2027   b->bs2 = bs*bs;
2028   b->mbs = mbs;
2029   b->nbs = nbs;
2030   b->Mbs = Mbs;
2031   b->Nbs = Nbs;
2032 
2033   /* the information in the maps duplicates the information computed below, eventually
2034      we should remove the duplicate information that is not contained in the maps */
2035   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2036   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2037 
2038   /* build local table of row and column ownerships */
2039   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2040   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2041   b->cowners    = b->rowners + b->size + 2;
2042   b->rowners_bs = b->cowners + b->size + 2;
2043   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2044   b->rowners[0]    = 0;
2045   for ( i=2; i<=b->size; i++ ) {
2046     b->rowners[i] += b->rowners[i-1];
2047   }
2048   for ( i=0; i<=b->size; i++ ) {
2049     b->rowners_bs[i] = b->rowners[i]*bs;
2050   }
2051   b->rstart    = b->rowners[b->rank];
2052   b->rend      = b->rowners[b->rank+1];
2053   b->rstart_bs = b->rstart * bs;
2054   b->rend_bs   = b->rend * bs;
2055 
2056   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2057   b->cowners[0] = 0;
2058   for ( i=2; i<=b->size; i++ ) {
2059     b->cowners[i] += b->cowners[i-1];
2060   }
2061   b->cstart    = b->cowners[b->rank];
2062   b->cend      = b->cowners[b->rank+1];
2063   b->cstart_bs = b->cstart * bs;
2064   b->cend_bs   = b->cend * bs;
2065 
2066 
2067   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2068   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2069   PLogObjectParent(B,b->A);
2070   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2071   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2072   PLogObjectParent(B,b->B);
2073 
2074   /* build cache for off array entries formed */
2075   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2076   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2077   b->donotstash  = 0;
2078   b->colmap      = 0;
2079   b->garray      = 0;
2080   b->roworiented = 1;
2081 
2082   /* stuff used in block assembly */
2083   b->barray       = 0;
2084 
2085   /* stuff used for matrix vector multiply */
2086   b->lvec         = 0;
2087   b->Mvctx        = 0;
2088 
2089   /* stuff for MatGetRow() */
2090   b->rowindices   = 0;
2091   b->rowvalues    = 0;
2092   b->getrowactive = PETSC_FALSE;
2093 
2094   /* hash table stuff */
2095   b->ht           = 0;
2096   b->hd           = 0;
2097   b->ht_size      = 0;
2098   b->ht_flag      = 0;
2099   b->ht_fact      = 0;
2100   b->ht_total_ct  = 0;
2101   b->ht_insert_ct = 0;
2102 
2103   *A = B;
2104   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2105   if (flg) {
2106     double fact = 1.39;
2107     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2108     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr);
2109     if (fact <= 1.0) fact = 1.39;
2110     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2111     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2112   }
2113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
2114                                      "MatStoreValues_MPIBAIJ",
2115                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
2117                                      "MatRetrieveValues_MPIBAIJ",
2118                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2120                                      "MatGetDiagonalBlock_MPIBAIJ",
2121                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2122   PetscFunctionReturn(0);
2123 }
2124 
2125 #undef __FUNC__
2126 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2127 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2128 {
2129   Mat         mat;
2130   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2131   int         ierr, len=0, flg;
2132 
2133   PetscFunctionBegin;
2134   *newmat       = 0;
2135   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2136   PLogObjectCreate(mat);
2137   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2138   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2139   mat->ops->destroy = MatDestroy_MPIBAIJ;
2140   mat->ops->view    = MatView_MPIBAIJ;
2141   mat->factor       = matin->factor;
2142   mat->assembled    = PETSC_TRUE;
2143   mat->insertmode   = NOT_SET_VALUES;
2144 
2145   a->m = mat->m   = oldmat->m;
2146   a->n = mat->n   = oldmat->n;
2147   a->M = mat->M   = oldmat->M;
2148   a->N = mat->N   = oldmat->N;
2149 
2150   a->bs  = oldmat->bs;
2151   a->bs2 = oldmat->bs2;
2152   a->mbs = oldmat->mbs;
2153   a->nbs = oldmat->nbs;
2154   a->Mbs = oldmat->Mbs;
2155   a->Nbs = oldmat->Nbs;
2156 
2157   a->rstart       = oldmat->rstart;
2158   a->rend         = oldmat->rend;
2159   a->cstart       = oldmat->cstart;
2160   a->cend         = oldmat->cend;
2161   a->size         = oldmat->size;
2162   a->rank         = oldmat->rank;
2163   a->donotstash   = oldmat->donotstash;
2164   a->roworiented  = oldmat->roworiented;
2165   a->rowindices   = 0;
2166   a->rowvalues    = 0;
2167   a->getrowactive = PETSC_FALSE;
2168   a->barray       = 0;
2169   a->rstart_bs    = oldmat->rstart_bs;
2170   a->rend_bs      = oldmat->rend_bs;
2171   a->cstart_bs    = oldmat->cstart_bs;
2172   a->cend_bs      = oldmat->cend_bs;
2173 
2174   /* hash table stuff */
2175   a->ht           = 0;
2176   a->hd           = 0;
2177   a->ht_size      = 0;
2178   a->ht_flag      = oldmat->ht_flag;
2179   a->ht_fact      = oldmat->ht_fact;
2180   a->ht_total_ct  = 0;
2181   a->ht_insert_ct = 0;
2182 
2183 
2184   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2185   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2186   a->cowners    = a->rowners + a->size + 2;
2187   a->rowners_bs = a->cowners + a->size + 2;
2188   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2189   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2190   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2191   if (oldmat->colmap) {
2192 #if defined (USE_CTABLE)
2193   ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2194 #else
2195     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2196     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2197     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2198 #endif
2199   } else a->colmap = 0;
2200   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2201     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2202     PLogObjectMemory(mat,len*sizeof(int));
2203     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2204   } else a->garray = 0;
2205 
2206   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2207   PLogObjectParent(mat,a->lvec);
2208   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2209 
2210   PLogObjectParent(mat,a->Mvctx);
2211   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2212   PLogObjectParent(mat,a->A);
2213   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2214   PLogObjectParent(mat,a->B);
2215   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2216   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2217   if (flg) {
2218     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2219   }
2220   *newmat = mat;
2221   PetscFunctionReturn(0);
2222 }
2223 
2224 #include "sys.h"
2225 
2226 #undef __FUNC__
2227 #define __FUNC__ "MatLoad_MPIBAIJ"
2228 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2229 {
2230   Mat          A;
2231   int          i, nz, ierr, j,rstart, rend, fd;
2232   Scalar       *vals,*buf;
2233   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2234   MPI_Status   status;
2235   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2236   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2237   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2238   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2239   int          dcount,kmax,k,nzcount,tmp;
2240 
2241   PetscFunctionBegin;
2242   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2243 
2244   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2245   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2246   if (!rank) {
2247     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2248     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2249     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2250     if (header[3] < 0) {
2251       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2252     }
2253   }
2254 
2255   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2256   M = header[1]; N = header[2];
2257 
2258   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2259 
2260   /*
2261      This code adds extra rows to make sure the number of rows is
2262      divisible by the blocksize
2263   */
2264   Mbs        = M/bs;
2265   extra_rows = bs - M + bs*(Mbs);
2266   if (extra_rows == bs) extra_rows = 0;
2267   else                  Mbs++;
2268   if (extra_rows &&!rank) {
2269     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2270   }
2271 
2272   /* determine ownership of all rows */
2273   mbs = Mbs/size + ((Mbs % size) > rank);
2274   m   = mbs * bs;
2275   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2276   browners = rowners + size + 1;
2277   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2278   rowners[0] = 0;
2279   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2280   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2281   rstart = rowners[rank];
2282   rend   = rowners[rank+1];
2283 
2284   /* distribute row lengths to all processors */
2285   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
2286   if (!rank) {
2287     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2288     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2289     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2290     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2291     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2292     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2293     PetscFree(sndcounts);
2294   } else {
2295     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2296   }
2297 
2298   if (!rank) {
2299     /* calculate the number of nonzeros on each processor */
2300     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2301     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2302     for ( i=0; i<size; i++ ) {
2303       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2304         procsnz[i] += rowlengths[j];
2305       }
2306     }
2307     PetscFree(rowlengths);
2308 
2309     /* determine max buffer needed and allocate it */
2310     maxnz = 0;
2311     for ( i=0; i<size; i++ ) {
2312       maxnz = PetscMax(maxnz,procsnz[i]);
2313     }
2314     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2315 
2316     /* read in my part of the matrix column indices  */
2317     nz = procsnz[0];
2318     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2319     mycols = ibuf;
2320     if (size == 1)  nz -= extra_rows;
2321     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2322     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2323 
2324     /* read in every ones (except the last) and ship off */
2325     for ( i=1; i<size-1; i++ ) {
2326       nz   = procsnz[i];
2327       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2328       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2329     }
2330     /* read in the stuff for the last proc */
2331     if ( size != 1 ) {
2332       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2333       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2334       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2335       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2336     }
2337     PetscFree(cols);
2338   } else {
2339     /* determine buffer space needed for message */
2340     nz = 0;
2341     for ( i=0; i<m; i++ ) {
2342       nz += locrowlens[i];
2343     }
2344     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2345     mycols = ibuf;
2346     /* receive message of column indices*/
2347     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2348     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2349     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2350   }
2351 
2352   /* loop over local rows, determining number of off diagonal entries */
2353   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2354   odlens = dlens + (rend-rstart);
2355   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2356   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2357   masked1 = mask    + Mbs;
2358   masked2 = masked1 + Mbs;
2359   rowcount = 0; nzcount = 0;
2360   for ( i=0; i<mbs; i++ ) {
2361     dcount  = 0;
2362     odcount = 0;
2363     for ( j=0; j<bs; j++ ) {
2364       kmax = locrowlens[rowcount];
2365       for ( k=0; k<kmax; k++ ) {
2366         tmp = mycols[nzcount++]/bs;
2367         if (!mask[tmp]) {
2368           mask[tmp] = 1;
2369           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2370           else masked1[dcount++] = tmp;
2371         }
2372       }
2373       rowcount++;
2374     }
2375 
2376     dlens[i]  = dcount;
2377     odlens[i] = odcount;
2378 
2379     /* zero out the mask elements we set */
2380     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2381     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2382   }
2383 
2384   /* create our matrix */
2385   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2386   A = *newmat;
2387   MatSetOption(A,MAT_COLUMNS_SORTED);
2388 
2389   if (!rank) {
2390     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
2391     /* read in my part of the matrix numerical values  */
2392     nz = procsnz[0];
2393     vals = buf;
2394     mycols = ibuf;
2395     if (size == 1)  nz -= extra_rows;
2396     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2397     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2398 
2399     /* insert into matrix */
2400     jj      = rstart*bs;
2401     for ( i=0; i<m; i++ ) {
2402       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2403       mycols += locrowlens[i];
2404       vals   += locrowlens[i];
2405       jj++;
2406     }
2407     /* read in other processors (except the last one) and ship out */
2408     for ( i=1; i<size-1; i++ ) {
2409       nz   = procsnz[i];
2410       vals = buf;
2411       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2412       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2413     }
2414     /* the last proc */
2415     if ( size != 1 ){
2416       nz   = procsnz[i] - extra_rows;
2417       vals = buf;
2418       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2419       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2420       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2421     }
2422     PetscFree(procsnz);
2423   } else {
2424     /* receive numeric values */
2425     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
2426 
2427     /* receive message of values*/
2428     vals   = buf;
2429     mycols = ibuf;
2430     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2431     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2432     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2433 
2434     /* insert into matrix */
2435     jj      = rstart*bs;
2436     for ( i=0; i<m; i++ ) {
2437       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2438       mycols += locrowlens[i];
2439       vals   += locrowlens[i];
2440       jj++;
2441     }
2442   }
2443   PetscFree(locrowlens);
2444   PetscFree(buf);
2445   PetscFree(ibuf);
2446   PetscFree(rowners);
2447   PetscFree(dlens);
2448   PetscFree(mask);
2449   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2450   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 
2455 
2456 #undef __FUNC__
2457 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2458 /*@
2459    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2460 
2461    Input Parameters:
2462 .  mat  - the matrix
2463 .  fact - factor
2464 
2465    Collective on Mat
2466 
2467    Level: advanced
2468 
2469   Notes:
2470    This can also be set by the command line option: -mat_use_hash_table fact
2471 
2472 .keywords: matrix, hashtable, factor, HT
2473 
2474 .seealso: MatSetOption()
2475 @*/
2476 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2477 {
2478   Mat_MPIBAIJ *baij;
2479 
2480   PetscFunctionBegin;
2481   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2482   if (mat->type != MATMPIBAIJ) {
2483       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2484   }
2485   baij = (Mat_MPIBAIJ*) mat->data;
2486   baij->ht_fact = fact;
2487   PetscFunctionReturn(0);
2488 }
2489