xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 8561465186c67854baea16c23ffe7637fc6ef9f3)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.172 1999/06/10 16:08:12 balay Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 EXTERN_C_BEGIN
20 #undef __FUNC__
21 #define __FUNC__ "MatStoreValues_MPIBAIJ"
22 int MatStoreValues_MPIBAIJ(Mat mat)
23 {
24   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
25   int         ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
29   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 EXTERN_C_END
33 
34 EXTERN_C_BEGIN
35 #undef __FUNC__
36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
37 int MatRetrieveValues_MPIBAIJ(Mat mat)
38 {
39   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
40   int         ierr;
41 
42   PetscFunctionBegin;
43   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
44   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
45   PetscFunctionReturn(0);
46 }
47 EXTERN_C_END
48 
49 /*
50      Local utility routine that creates a mapping from the global column
51    number to the local number in the off-diagonal part of the local
52    storage of the matrix.  This is done in a non scable way since the
53    length of colmap equals the global matrix length.
54 */
55 #undef __FUNC__
56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
57 static int CreateColmap_MPIBAIJ_Private(Mat mat)
58 {
59   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
60   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
61   int         nbs = B->nbs,i,bs=B->bs,ierr;
62 
63   PetscFunctionBegin;
64 #if defined (PETSC_USE_CTABLE)
65   ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
66   for ( i=0; i<nbs; i++ ){
67     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
68   }
69 #else
70   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
71   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
72   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
73   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
74 #endif
75   PetscFunctionReturn(0);
76 }
77 
78 #define CHUNKSIZE  10
79 
80 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
81 { \
82  \
83     brow = row/bs;  \
84     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
85     rmax = aimax[brow]; nrow = ailen[brow]; \
86       bcol = col/bs; \
87       ridx = row % bs; cidx = col % bs; \
88       low = 0; high = nrow; \
89       while (high-low > 3) { \
90         t = (low+high)/2; \
91         if (rp[t] > bcol) high = t; \
92         else              low  = t; \
93       } \
94       for ( _i=low; _i<high; _i++ ) { \
95         if (rp[_i] > bcol) break; \
96         if (rp[_i] == bcol) { \
97           bap  = ap +  bs2*_i + bs*cidx + ridx; \
98           if (addv == ADD_VALUES) *bap += value;  \
99           else                    *bap  = value;  \
100           goto a_noinsert; \
101         } \
102       } \
103       if (a->nonew == 1) goto a_noinsert; \
104       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
105       if (nrow >= rmax) { \
106         /* there is no extra room in row, therefore enlarge */ \
107         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
108         Scalar *new_a; \
109  \
110         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
111  \
112         /* malloc new storage space */ \
113         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
114         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
115         new_j   = (int *) (new_a + bs2*new_nz); \
116         new_i   = new_j + new_nz; \
117  \
118         /* copy over old data into new slots */ \
119         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
120         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
121         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
122         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
123         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
124                           len*sizeof(int));CHKERRQ(ierr); \
125         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
126         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
127         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
128                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
129         /* free up old matrix storage */ \
130         PetscFree(a->a);  \
131         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = 1; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for ( ii=N; ii>=_i; ii-- ) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 
155 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
156 { \
157  \
158     brow = row/bs;  \
159     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
160     rmax = bimax[brow]; nrow = bilen[brow]; \
161       bcol = col/bs; \
162       ridx = row % bs; cidx = col % bs; \
163       low = 0; high = nrow; \
164       while (high-low > 3) { \
165         t = (low+high)/2; \
166         if (rp[t] > bcol) high = t; \
167         else              low  = t; \
168       } \
169       for ( _i=low; _i<high; _i++ ) { \
170         if (rp[_i] > bcol) break; \
171         if (rp[_i] == bcol) { \
172           bap  = ap +  bs2*_i + bs*cidx + ridx; \
173           if (addv == ADD_VALUES) *bap += value;  \
174           else                    *bap  = value;  \
175           goto b_noinsert; \
176         } \
177       } \
178       if (b->nonew == 1) goto b_noinsert; \
179       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
180       if (nrow >= rmax) { \
181         /* there is no extra room in row, therefore enlarge */ \
182         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
183         Scalar *new_a; \
184  \
185         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
186  \
187         /* malloc new storage space */ \
188         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
189         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
190         new_j   = (int *) (new_a + bs2*new_nz); \
191         new_i   = new_j + new_nz; \
192  \
193         /* copy over old data into new slots */ \
194         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
195         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
196         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
197         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
198         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
199                            len*sizeof(int));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
201         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
202         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
203                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
204         /* free up old matrix storage */ \
205         PetscFree(b->a);  \
206         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = 1; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for ( ii=N; ii>=_i; ii-- ) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 
230 #undef __FUNC__
231 #define __FUNC__ "MatSetValues_MPIBAIJ"
232 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
233 {
234   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
235   Scalar      value;
236   int         ierr,i,j,row,col;
237   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
238   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
239   int         cend_orig=baij->cend_bs,bs=baij->bs;
240 
241   /* Some Variables required in the macro */
242   Mat         A = baij->A;
243   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
244   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
245   Scalar      *aa=a->a;
246 
247   Mat         B = baij->B;
248   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
249   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
250   Scalar      *ba=b->a;
251 
252   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
253   int         low,high,t,ridx,cidx,bs2=a->bs2;
254   Scalar      *ap,*bap;
255 
256   PetscFunctionBegin;
257   for ( i=0; i<m; i++ ) {
258     if (im[i] < 0) continue;
259 #if defined(PETSC_USE_BOPT_g)
260     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
261 #endif
262     if (im[i] >= rstart_orig && im[i] < rend_orig) {
263       row = im[i] - rstart_orig;
264       for ( j=0; j<n; j++ ) {
265         if (in[j] >= cstart_orig && in[j] < cend_orig){
266           col = in[j] - cstart_orig;
267           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
268           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
269           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
270         } else if (in[j] < 0) continue;
271 #if defined(PETSC_USE_BOPT_g)
272         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
273 #endif
274         else {
275           if (mat->was_assembled) {
276             if (!baij->colmap) {
277               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
278             }
279 #if defined (PETSC_USE_CTABLE)
280             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
281             col  = col - 1 + in[j]%bs;
282 #else
283             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
284 #endif
285             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
286               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
287               col =  in[j];
288               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
289               B = baij->B;
290               b = (Mat_SeqBAIJ *) (B)->data;
291               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
292               ba=b->a;
293             }
294           } else col = in[j];
295           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
296           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
297           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
298         }
299       }
300     } else {
301       if ( !baij->donotstash) {
302         if (roworiented) {
303           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
304         } else {
305           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
306         }
307       }
308     }
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNC__
314 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
315 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
316 {
317   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
318   Scalar      *value,*barray=baij->barray;
319   int         ierr,i,j,ii,jj,row,col;
320   int         roworiented = baij->roworiented,rstart=baij->rstart ;
321   int         rend=baij->rend,cstart=baij->cstart,stepval;
322   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
323 
324   if(!barray) {
325     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
326   }
327 
328   if (roworiented) {
329     stepval = (n-1)*bs;
330   } else {
331     stepval = (m-1)*bs;
332   }
333   for ( i=0; i<m; i++ ) {
334     if (im[i] < 0) continue;
335 #if defined(PETSC_USE_BOPT_g)
336     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
337 #endif
338     if (im[i] >= rstart && im[i] < rend) {
339       row = im[i] - rstart;
340       for ( j=0; j<n; j++ ) {
341         /* If NumCol = 1 then a copy is not required */
342         if ((roworiented) && (n == 1)) {
343           barray = v + i*bs2;
344         } else if((!roworiented) && (m == 1)) {
345           barray = v + j*bs2;
346         } else { /* Here a copy is required */
347           if (roworiented) {
348             value = v + i*(stepval+bs)*bs + j*bs;
349           } else {
350             value = v + j*(stepval+bs)*bs + i*bs;
351           }
352           for ( ii=0; ii<bs; ii++,value+=stepval ) {
353             for (jj=0; jj<bs; jj++ ) {
354               *barray++  = *value++;
355             }
356           }
357           barray -=bs2;
358         }
359 
360         if (in[j] >= cstart && in[j] < cend){
361           col  = in[j] - cstart;
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364         else if (in[j] < 0) continue;
365 #if defined(PETSC_USE_BOPT_g)
366         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
367 #endif
368         else {
369           if (mat->was_assembled) {
370             if (!baij->colmap) {
371               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
372             }
373 
374 #if defined(PETSC_USE_BOPT_g)
375 #if defined (PETSC_USE_CTABLE)
376             { int data;
377             ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
378             if((data - 1) % bs)
379 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
380             }
381 #else
382             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
383 #endif
384 #endif
385 #if defined (PETSC_USE_CTABLE)
386 	    ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
387             col  = (col - 1)/bs;
388 #else
389             col = (baij->colmap[in[j]] - 1)/bs;
390 #endif
391             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
392               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
393               col =  in[j];
394             }
395           }
396           else col = in[j];
397           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
398         }
399       }
400     } else {
401       if (!baij->donotstash) {
402         if (roworiented) {
403           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
404         } else {
405           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
406         }
407       }
408     }
409   }
410   PetscFunctionReturn(0);
411 }
412 #define HASH_KEY 0.6180339887
413 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
414 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
415 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
416 #undef __FUNC__
417 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
418 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
419 {
420   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
421   int         ierr,i,j,row,col;
422   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
423   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
424   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
425   double      tmp;
426   Scalar      ** HD = baij->hd,value;
427 #if defined(PETSC_USE_BOPT_g)
428   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
429 #endif
430 
431   PetscFunctionBegin;
432 
433   for ( i=0; i<m; i++ ) {
434 #if defined(PETSC_USE_BOPT_g)
435     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
436     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
437 #endif
438       row = im[i];
439     if (row >= rstart_orig && row < rend_orig) {
440       for ( j=0; j<n; j++ ) {
441         col = in[j];
442         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
443         /* Look up into the Hash Table */
444         key = (row/bs)*Nbs+(col/bs)+1;
445         h1  = HASH(size,key,tmp);
446 
447 
448         idx = h1;
449 #if defined(PETSC_USE_BOPT_g)
450         insert_ct++;
451         total_ct++;
452         if (HT[idx] != key) {
453           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
454           if (idx == size) {
455             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
456             if (idx == h1) {
457               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
458             }
459           }
460         }
461 #else
462         if (HT[idx] != key) {
463           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
464           if (idx == size) {
465             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
466             if (idx == h1) {
467               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
468             }
469           }
470         }
471 #endif
472         /* A HASH table entry is found, so insert the values at the correct address */
473         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
474         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
475       }
476     } else {
477       if (!baij->donotstash) {
478         if (roworiented) {
479           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
480         } else {
481           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
482         }
483       }
484     }
485   }
486 #if defined(PETSC_USE_BOPT_g)
487   baij->ht_total_ct = total_ct;
488   baij->ht_insert_ct = insert_ct;
489 #endif
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNC__
494 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
495 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
496 {
497   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
498   int         ierr,i,j,ii,jj,row,col;
499   int         roworiented = baij->roworiented,rstart=baij->rstart ;
500   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
501   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
502   double      tmp;
503   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
504 #if defined(PETSC_USE_BOPT_g)
505   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
506 #endif
507 
508   PetscFunctionBegin;
509 
510   if (roworiented) {
511     stepval = (n-1)*bs;
512   } else {
513     stepval = (m-1)*bs;
514   }
515   for ( i=0; i<m; i++ ) {
516 #if defined(PETSC_USE_BOPT_g)
517     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
518     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
519 #endif
520     row   = im[i];
521     v_t   = v + i*bs2;
522     if (row >= rstart && row < rend) {
523       for ( j=0; j<n; j++ ) {
524         col = in[j];
525 
526         /* Look up into the Hash Table */
527         key = row*Nbs+col+1;
528         h1  = HASH(size,key,tmp);
529 
530         idx = h1;
531 #if defined(PETSC_USE_BOPT_g)
532         total_ct++;
533         insert_ct++;
534        if (HT[idx] != key) {
535           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
536           if (idx == size) {
537             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
538             if (idx == h1) {
539               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
540             }
541           }
542         }
543 #else
544         if (HT[idx] != key) {
545           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
546           if (idx == size) {
547             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
548             if (idx == h1) {
549               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
550             }
551           }
552         }
553 #endif
554         baij_a = HD[idx];
555         if (roworiented) {
556           /*value = v + i*(stepval+bs)*bs + j*bs;*/
557           /* value = v + (i*(stepval+bs)+j)*bs; */
558           value = v_t;
559           v_t  += bs;
560           if (addv == ADD_VALUES) {
561             for ( ii=0; ii<bs; ii++,value+=stepval) {
562               for ( jj=ii; jj<bs2; jj+=bs ) {
563                 baij_a[jj]  += *value++;
564               }
565             }
566           } else {
567             for ( ii=0; ii<bs; ii++,value+=stepval) {
568               for ( jj=ii; jj<bs2; jj+=bs ) {
569                 baij_a[jj]  = *value++;
570               }
571             }
572           }
573         } else {
574           value = v + j*(stepval+bs)*bs + i*bs;
575           if (addv == ADD_VALUES) {
576             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
577               for ( jj=0; jj<bs; jj++ ) {
578                 baij_a[jj]  += *value++;
579               }
580             }
581           } else {
582             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
583               for ( jj=0; jj<bs; jj++ ) {
584                 baij_a[jj]  = *value++;
585               }
586             }
587           }
588         }
589       }
590     } else {
591       if (!baij->donotstash) {
592         if (roworiented) {
593           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
594         } else {
595           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
596         }
597       }
598     }
599   }
600 #if defined(PETSC_USE_BOPT_g)
601   baij->ht_total_ct = total_ct;
602   baij->ht_insert_ct = insert_ct;
603 #endif
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNC__
608 #define __FUNC__ "MatGetValues_MPIBAIJ"
609 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
610 {
611   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
612   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
613   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
614 
615   PetscFunctionBegin;
616   for ( i=0; i<m; i++ ) {
617     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
618     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
619     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
620       row = idxm[i] - bsrstart;
621       for ( j=0; j<n; j++ ) {
622         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
623         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
624         if (idxn[j] >= bscstart && idxn[j] < bscend){
625           col = idxn[j] - bscstart;
626           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
627         } else {
628           if (!baij->colmap) {
629             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
630           }
631 #if defined (PETSC_USE_CTABLE)
632           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
633           data --;
634 #else
635           data = baij->colmap[idxn[j]/bs]-1;
636 #endif
637           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
638           else {
639             col  = data + idxn[j]%bs;
640             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
641           }
642         }
643       }
644     } else {
645       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
646     }
647   }
648  PetscFunctionReturn(0);
649 }
650 
651 #undef __FUNC__
652 #define __FUNC__ "MatNorm_MPIBAIJ"
653 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
654 {
655   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
656   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
657   int        ierr, i,bs2=baij->bs2;
658   double     sum = 0.0;
659   Scalar     *v;
660 
661   PetscFunctionBegin;
662   if (baij->size == 1) {
663     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
664   } else {
665     if (type == NORM_FROBENIUS) {
666       v = amat->a;
667       for (i=0; i<amat->nz*bs2; i++ ) {
668 #if defined(PETSC_USE_COMPLEX)
669         sum += PetscReal(PetscConj(*v)*(*v)); v++;
670 #else
671         sum += (*v)*(*v); v++;
672 #endif
673       }
674       v = bmat->a;
675       for (i=0; i<bmat->nz*bs2; i++ ) {
676 #if defined(PETSC_USE_COMPLEX)
677         sum += PetscReal(PetscConj(*v)*(*v)); v++;
678 #else
679         sum += (*v)*(*v); v++;
680 #endif
681       }
682       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
683       *norm = sqrt(*norm);
684     } else {
685       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
686     }
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 
692 /*
693   Creates the hash table, and sets the table
694   This table is created only once.
695   If new entried need to be added to the matrix
696   then the hash table has to be destroyed and
697   recreated.
698 */
699 #undef __FUNC__
700 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
701 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
702 {
703   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
704   Mat         A = baij->A, B=baij->B;
705   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
706   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
707   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
708   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
709   int         *HT,key;
710   Scalar      **HD;
711   double      tmp;
712 #if defined(PETSC_USE_BOPT_g)
713   int         ct=0,max=0;
714 #endif
715 
716   PetscFunctionBegin;
717   baij->ht_size=(int)(factor*nz);
718   size = baij->ht_size;
719 
720   if (baij->ht) {
721     PetscFunctionReturn(0);
722   }
723 
724   /* Allocate Memory for Hash Table */
725   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
726   baij->ht = (int*)(baij->hd + size);
727   HD = baij->hd;
728   HT = baij->ht;
729 
730 
731   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
732 
733 
734   /* Loop Over A */
735   for ( i=0; i<a->mbs; i++ ) {
736     for ( j=ai[i]; j<ai[i+1]; j++ ) {
737       row = i+rstart;
738       col = aj[j]+cstart;
739 
740       key = row*Nbs + col + 1;
741       h1  = HASH(size,key,tmp);
742       for ( k=0; k<size; k++ ){
743         if (HT[(h1+k)%size] == 0.0) {
744           HT[(h1+k)%size] = key;
745           HD[(h1+k)%size] = a->a + j*bs2;
746           break;
747 #if defined(PETSC_USE_BOPT_g)
748         } else {
749           ct++;
750 #endif
751         }
752       }
753 #if defined(PETSC_USE_BOPT_g)
754       if (k> max) max = k;
755 #endif
756     }
757   }
758   /* Loop Over B */
759   for ( i=0; i<b->mbs; i++ ) {
760     for ( j=bi[i]; j<bi[i+1]; j++ ) {
761       row = i+rstart;
762       col = garray[bj[j]];
763       key = row*Nbs + col + 1;
764       h1  = HASH(size,key,tmp);
765       for ( k=0; k<size; k++ ){
766         if (HT[(h1+k)%size] == 0.0) {
767           HT[(h1+k)%size] = key;
768           HD[(h1+k)%size] = b->a + j*bs2;
769           break;
770 #if defined(PETSC_USE_BOPT_g)
771         } else {
772           ct++;
773 #endif
774         }
775       }
776 #if defined(PETSC_USE_BOPT_g)
777       if (k> max) max = k;
778 #endif
779     }
780   }
781 
782   /* Print Summary */
783 #if defined(PETSC_USE_BOPT_g)
784   for ( i=0,j=0; i<size; i++)
785     if (HT[i]) {j++;}
786   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
787            (j== 0)? 0.0:((double)(ct+j))/j,max);
788 #endif
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNC__
793 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
794 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
795 {
796   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
797   int         ierr,nstash,reallocs;
798   InsertMode  addv;
799 
800   PetscFunctionBegin;
801   if (baij->donotstash) {
802     PetscFunctionReturn(0);
803   }
804 
805   /* make sure all processors are either in INSERTMODE or ADDMODE */
806   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
807   if (addv == (ADD_VALUES|INSERT_VALUES)) {
808     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
809   }
810   mat->insertmode = addv; /* in case this processor had no cache */
811 
812   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
813   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
815   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
816   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
817   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNC__
822 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
823 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
826   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
827   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
828   int         *row,*col,other_disassembled,r1,r2,r3;
829   Scalar      *val;
830   InsertMode  addv = mat->insertmode;
831 
832   PetscFunctionBegin;
833   if (!baij->donotstash) {
834     while (1) {
835       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
836       if (!flg) break;
837 
838       for ( i=0; i<n; ) {
839         /* Now identify the consecutive vals belonging to the same row */
840         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
841         if (j < n) ncols = j-i;
842         else       ncols = n-i;
843         /* Now assemble all these values with a single function call */
844         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
845         i = j;
846       }
847     }
848     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
849     /* Now process the block-stash. Since the values are stashed column-oriented,
850        set the roworiented flag to column oriented, and after MatSetValues()
851        restore the original flags */
852     r1 = baij->roworiented;
853     r2 = a->roworiented;
854     r3 = b->roworiented;
855     baij->roworiented = 0;
856     a->roworiented    = 0;
857     b->roworiented    = 0;
858     while (1) {
859       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
860       if (!flg) break;
861 
862       for ( i=0; i<n; ) {
863         /* Now identify the consecutive vals belonging to the same row */
864         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
865         if (j < n) ncols = j-i;
866         else       ncols = n-i;
867         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
868         i = j;
869       }
870     }
871     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
872     baij->roworiented = r1;
873     a->roworiented    = r2;
874     b->roworiented    = r3;
875   }
876 
877   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
878   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
879 
880   /* determine if any processor has disassembled, if so we must
881      also disassemble ourselfs, in order that we may reassemble. */
882   /*
883      if nonzero structure of submatrix B cannot change then we know that
884      no processor disassembled thus we can skip this stuff
885   */
886   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
887     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
888     if (mat->was_assembled && !other_disassembled) {
889       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
890     }
891   }
892 
893   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
894     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
895   }
896   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
897   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
898 
899 #if defined(PETSC_USE_BOPT_g)
900   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
901     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
902              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
903     baij->ht_total_ct  = 0;
904     baij->ht_insert_ct = 0;
905   }
906 #endif
907   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
908     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
909     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
910     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
911   }
912 
913   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
914   PetscFunctionReturn(0);
915 }
916 
917 /*
918 #undef __FUNC__
919 #define __FUNC__ "MatView_MPIBAIJ_Binary"
920 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
921 {
922   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
923   int          ierr;
924 
925   PetscFunctionBegin;
926   if (baij->size == 1) {
927     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
928   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
929   PetscFunctionReturn(0);
930 }
931 */
932 
933 #undef __FUNC__
934 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
935 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
936 {
937   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
938   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
939   FILE         *fd;
940   ViewerType   vtype;
941 
942   PetscFunctionBegin;
943   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
944   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
945     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
946     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
947       MatInfo info;
948       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
949       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
950       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
951       ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr);
952       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
953               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
954               baij->bs,(int)info.memory);
955       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
956       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
957       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
958       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
959       fflush(fd);
960       ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr);
961       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
962       PetscFunctionReturn(0);
963     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
964       ierr = PetscPrintf(mat->comm,"  block size is %d\n",bs);CHKERRQ(ierr);
965       PetscFunctionReturn(0);
966     }
967   }
968 
969   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
970     Draw       draw;
971     PetscTruth isnull;
972     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
973     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
974   }
975 
976   if (size == 1) {
977     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
978   } else {
979     /* assemble the entire matrix onto first processor. */
980     Mat         A;
981     Mat_SeqBAIJ *Aloc;
982     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
983     int         mbs = baij->mbs;
984     Scalar      *a;
985 
986     if (!rank) {
987       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
988     } else {
989       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
990     }
991     PLogObjectParent(mat,A);
992 
993     /* copy over the A part */
994     Aloc = (Mat_SeqBAIJ*) baij->A->data;
995     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
996     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
997 
998     for ( i=0; i<mbs; i++ ) {
999       rvals[0] = bs*(baij->rstart + i);
1000       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1001       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1002         col = (baij->cstart+aj[j])*bs;
1003         for (k=0; k<bs; k++ ) {
1004           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1005           col++; a += bs;
1006         }
1007       }
1008     }
1009     /* copy over the B part */
1010     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1011     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1012     for ( i=0; i<mbs; i++ ) {
1013       rvals[0] = bs*(baij->rstart + i);
1014       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1015       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1016         col = baij->garray[aj[j]]*bs;
1017         for (k=0; k<bs; k++ ) {
1018           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1019           col++; a += bs;
1020         }
1021       }
1022     }
1023     PetscFree(rvals);
1024     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1025     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1026     /*
1027        Everyone has to call to draw the matrix since the graphics waits are
1028        synchronized across all processors that share the Draw object
1029     */
1030     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1031       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
1032     }
1033     ierr = MatDestroy(A);CHKERRQ(ierr);
1034   }
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 
1039 
1040 #undef __FUNC__
1041 #define __FUNC__ "MatView_MPIBAIJ"
1042 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1043 {
1044   int         ierr;
1045   ViewerType  vtype;
1046 
1047   PetscFunctionBegin;
1048   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
1049   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1050       PetscTypeCompare(vtype,SOCKET_VIEWER)|| PetscTypeCompare(vtype,BINARY_VIEWER)) {
1051     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1052     /*
1053   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1054     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1055     */
1056   } else {
1057     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1058   }
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNC__
1063 #define __FUNC__ "MatDestroy_MPIBAIJ"
1064 int MatDestroy_MPIBAIJ(Mat mat)
1065 {
1066   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1067   int         ierr;
1068 
1069   PetscFunctionBegin;
1070   if (--mat->refct > 0) PetscFunctionReturn(0);
1071 
1072   if (mat->mapping) {
1073     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1074   }
1075   if (mat->bmapping) {
1076     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1077   }
1078   if (mat->rmap) {
1079     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1080   }
1081   if (mat->cmap) {
1082     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1083   }
1084 #if defined(PETSC_USE_LOG)
1085   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1086 #endif
1087 
1088   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1089   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1090 
1091   PetscFree(baij->rowners);
1092   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1093   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1094 #if defined (PETSC_USE_CTABLE)
1095   if (baij->colmap) TableDelete(baij->colmap);
1096 #else
1097   if (baij->colmap) PetscFree(baij->colmap);
1098 #endif
1099   if (baij->garray) PetscFree(baij->garray);
1100   if (baij->lvec)   VecDestroy(baij->lvec);
1101   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1102   if (baij->rowvalues) PetscFree(baij->rowvalues);
1103   if (baij->barray) PetscFree(baij->barray);
1104   if (baij->hd) PetscFree(baij->hd);
1105   PetscFree(baij);
1106   PLogObjectDestroy(mat);
1107   PetscHeaderDestroy(mat);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNC__
1112 #define __FUNC__ "MatMult_MPIBAIJ"
1113 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1114 {
1115   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1116   int         ierr, nt;
1117 
1118   PetscFunctionBegin;
1119   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1120   if (nt != a->n) {
1121     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1122   }
1123   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1124   if (nt != a->m) {
1125     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1126   }
1127   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1128   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1129   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1130   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1131   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNC__
1136 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1137 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1138 {
1139   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1140   int        ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1144   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1145   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1146   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNC__
1151 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1152 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1153 {
1154   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1155   int         ierr;
1156 
1157   PetscFunctionBegin;
1158   /* do nondiagonal part */
1159   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1160   /* send it on its way */
1161   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1162   /* do local part */
1163   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1164   /* receive remote parts: note this assumes the values are not actually */
1165   /* inserted in yy until the next line, which is true for my implementation*/
1166   /* but is not perhaps always true. */
1167   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNC__
1172 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1173 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1174 {
1175   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1176   int         ierr;
1177 
1178   PetscFunctionBegin;
1179   /* do nondiagonal part */
1180   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1181   /* send it on its way */
1182   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1183   /* do local part */
1184   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1185   /* receive remote parts: note this assumes the values are not actually */
1186   /* inserted in yy until the next line, which is true for my implementation*/
1187   /* but is not perhaps always true. */
1188   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 /*
1193   This only works correctly for square matrices where the subblock A->A is the
1194    diagonal block
1195 */
1196 #undef __FUNC__
1197 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1198 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1199 {
1200   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1201   int         ierr;
1202 
1203   PetscFunctionBegin;
1204   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1205   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNC__
1210 #define __FUNC__ "MatScale_MPIBAIJ"
1211 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1212 {
1213   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1214   int         ierr;
1215 
1216   PetscFunctionBegin;
1217   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1218   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 #undef __FUNC__
1223 #define __FUNC__ "MatGetSize_MPIBAIJ"
1224 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1225 {
1226   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1227 
1228   PetscFunctionBegin;
1229   if (m) *m = mat->M;
1230   if (n) *n = mat->N;
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNC__
1235 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1236 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1237 {
1238   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1239 
1240   PetscFunctionBegin;
1241   *m = mat->m; *n = mat->n;
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNC__
1246 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1247 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1248 {
1249   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1250 
1251   PetscFunctionBegin;
1252   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNC__
1257 #define __FUNC__ "MatGetRow_MPIBAIJ"
1258 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1259 {
1260   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1261   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1262   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1263   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1264   int        *cmap, *idx_p,cstart = mat->cstart;
1265 
1266   PetscFunctionBegin;
1267   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1268   mat->getrowactive = PETSC_TRUE;
1269 
1270   if (!mat->rowvalues && (idx || v)) {
1271     /*
1272         allocate enough space to hold information from the longest row.
1273     */
1274     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1275     int     max = 1,mbs = mat->mbs,tmp;
1276     for ( i=0; i<mbs; i++ ) {
1277       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1278       if (max < tmp) { max = tmp; }
1279     }
1280     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1281     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1282   }
1283 
1284   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1285   lrow = row - brstart;
1286 
1287   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1288   if (!v)   {pvA = 0; pvB = 0;}
1289   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1290   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1291   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1292   nztot = nzA + nzB;
1293 
1294   cmap  = mat->garray;
1295   if (v  || idx) {
1296     if (nztot) {
1297       /* Sort by increasing column numbers, assuming A and B already sorted */
1298       int imark = -1;
1299       if (v) {
1300         *v = v_p = mat->rowvalues;
1301         for ( i=0; i<nzB; i++ ) {
1302           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1303           else break;
1304         }
1305         imark = i;
1306         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1307         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1308       }
1309       if (idx) {
1310         *idx = idx_p = mat->rowindices;
1311         if (imark > -1) {
1312           for ( i=0; i<imark; i++ ) {
1313             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1314           }
1315         } else {
1316           for ( i=0; i<nzB; i++ ) {
1317             if (cmap[cworkB[i]/bs] < cstart)
1318               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1319             else break;
1320           }
1321           imark = i;
1322         }
1323         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1324         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1325       }
1326     } else {
1327       if (idx) *idx = 0;
1328       if (v)   *v   = 0;
1329     }
1330   }
1331   *nz = nztot;
1332   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1333   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNC__
1338 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1339 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1340 {
1341   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1342 
1343   PetscFunctionBegin;
1344   if (baij->getrowactive == PETSC_FALSE) {
1345     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1346   }
1347   baij->getrowactive = PETSC_FALSE;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNC__
1352 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1353 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1354 {
1355   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1356 
1357   PetscFunctionBegin;
1358   *bs = baij->bs;
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNC__
1363 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1364 int MatZeroEntries_MPIBAIJ(Mat A)
1365 {
1366   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1367   int         ierr;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1371   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNC__
1376 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1377 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1378 {
1379   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1380   Mat         A = a->A, B = a->B;
1381   int         ierr;
1382   double      isend[5], irecv[5];
1383 
1384   PetscFunctionBegin;
1385   info->block_size     = (double)a->bs;
1386   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1387   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1388   isend[3] = info->memory;  isend[4] = info->mallocs;
1389   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1390   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1391   isend[3] += info->memory;  isend[4] += info->mallocs;
1392   if (flag == MAT_LOCAL) {
1393     info->nz_used      = isend[0];
1394     info->nz_allocated = isend[1];
1395     info->nz_unneeded  = isend[2];
1396     info->memory       = isend[3];
1397     info->mallocs      = isend[4];
1398   } else if (flag == MAT_GLOBAL_MAX) {
1399     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1400     info->nz_used      = irecv[0];
1401     info->nz_allocated = irecv[1];
1402     info->nz_unneeded  = irecv[2];
1403     info->memory       = irecv[3];
1404     info->mallocs      = irecv[4];
1405   } else if (flag == MAT_GLOBAL_SUM) {
1406     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1407     info->nz_used      = irecv[0];
1408     info->nz_allocated = irecv[1];
1409     info->nz_unneeded  = irecv[2];
1410     info->memory       = irecv[3];
1411     info->mallocs      = irecv[4];
1412   } else {
1413     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1414   }
1415   info->rows_global       = (double)a->M;
1416   info->columns_global    = (double)a->N;
1417   info->rows_local        = (double)a->m;
1418   info->columns_local     = (double)a->N;
1419   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1420   info->fill_ratio_needed = 0;
1421   info->factor_mallocs    = 0;
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNC__
1426 #define __FUNC__ "MatSetOption_MPIBAIJ"
1427 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1428 {
1429   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1430   int         ierr;
1431 
1432   PetscFunctionBegin;
1433   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1434       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1435       op == MAT_COLUMNS_UNSORTED ||
1436       op == MAT_COLUMNS_SORTED ||
1437       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1438       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1439         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1440         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1441   } else if (op == MAT_ROW_ORIENTED) {
1442         a->roworiented = 1;
1443         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1444         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1445   } else if (op == MAT_ROWS_SORTED ||
1446              op == MAT_ROWS_UNSORTED ||
1447              op == MAT_SYMMETRIC ||
1448              op == MAT_STRUCTURALLY_SYMMETRIC ||
1449              op == MAT_YES_NEW_DIAGONALS ||
1450              op == MAT_USE_HASH_TABLE) {
1451     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1452   } else if (op == MAT_COLUMN_ORIENTED) {
1453     a->roworiented = 0;
1454     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1455     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1456   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1457     a->donotstash = 1;
1458   } else if (op == MAT_NO_NEW_DIAGONALS) {
1459     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1460   } else if (op == MAT_USE_HASH_TABLE) {
1461     a->ht_flag = 1;
1462   } else {
1463     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1464   }
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNC__
1469 #define __FUNC__ "MatTranspose_MPIBAIJ("
1470 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1471 {
1472   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1473   Mat_SeqBAIJ *Aloc;
1474   Mat        B;
1475   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1476   int        bs=baij->bs,mbs=baij->mbs;
1477   Scalar     *a;
1478 
1479   PetscFunctionBegin;
1480   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1481   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1482 CHKERRQ(ierr);
1483 
1484   /* copy over the A part */
1485   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1486   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1487   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1488 
1489   for ( i=0; i<mbs; i++ ) {
1490     rvals[0] = bs*(baij->rstart + i);
1491     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1492     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1493       col = (baij->cstart+aj[j])*bs;
1494       for (k=0; k<bs; k++ ) {
1495         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1496         col++; a += bs;
1497       }
1498     }
1499   }
1500   /* copy over the B part */
1501   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1502   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1503   for ( i=0; i<mbs; i++ ) {
1504     rvals[0] = bs*(baij->rstart + i);
1505     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1506     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1507       col = baij->garray[aj[j]]*bs;
1508       for (k=0; k<bs; k++ ) {
1509         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1510         col++; a += bs;
1511       }
1512     }
1513   }
1514   PetscFree(rvals);
1515   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1516   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1517 
1518   if (matout != PETSC_NULL) {
1519     *matout = B;
1520   } else {
1521     PetscOps *Abops;
1522     MatOps   Aops;
1523 
1524     /* This isn't really an in-place transpose .... but free data structures from baij */
1525     PetscFree(baij->rowners);
1526     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1527     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1528 #if defined (PETSC_USE_CTABLE)
1529     if (baij->colmap) TableDelete(baij->colmap);
1530 #else
1531     if (baij->colmap) PetscFree(baij->colmap);
1532 #endif
1533     if (baij->garray) PetscFree(baij->garray);
1534     if (baij->lvec) VecDestroy(baij->lvec);
1535     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1536     PetscFree(baij);
1537 
1538     /*
1539        This is horrible, horrible code. We need to keep the
1540       A pointers for the bops and ops but copy everything
1541       else from C.
1542     */
1543     Abops   = A->bops;
1544     Aops    = A->ops;
1545     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1546     A->bops = Abops;
1547     A->ops  = Aops;
1548 
1549     PetscHeaderDestroy(B);
1550   }
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 #undef __FUNC__
1555 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1556 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1557 {
1558   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1559   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1560   int ierr,s1,s2,s3;
1561 
1562   PetscFunctionBegin;
1563   if (ll)  {
1564     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1565     ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
1566     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1567     ierr = MatDiagonalScale(a,ll,0);CHKERRQ(ierr);
1568     ierr = MatDiagonalScale(b,ll,0);CHKERRQ(ierr);
1569   }
1570   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNC__
1575 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1576 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1577 {
1578   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1579   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1580   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1581   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1582   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1583   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1584   MPI_Comm       comm = A->comm;
1585   MPI_Request    *send_waits,*recv_waits;
1586   MPI_Status     recv_status,*send_status;
1587   IS             istmp;
1588 
1589   PetscFunctionBegin;
1590   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1591   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1592 
1593   /*  first count number of contributors to each processor */
1594   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1595   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1596  procs   = nprocs + size;
1597   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1598   for ( i=0; i<N; i++ ) {
1599     idx = rows[i];
1600     found = 0;
1601     for ( j=0; j<size; j++ ) {
1602       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1603         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1604       }
1605     }
1606     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1607   }
1608   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1609 
1610   /* inform other processors of number of messages and max length*/
1611   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
1612   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1613   nrecvs = work[rank];
1614   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1615   nmax   = work[rank];
1616   PetscFree(work);
1617 
1618   /* post receives:   */
1619   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1620   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1621   for ( i=0; i<nrecvs; i++ ) {
1622     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1623   }
1624 
1625   /* do sends:
1626      1) starts[i] gives the starting index in svalues for stuff going to
1627      the ith processor
1628   */
1629   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1630   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1631   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
1632   starts[0] = 0;
1633   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1634   for ( i=0; i<N; i++ ) {
1635     svalues[starts[owner[i]]++] = rows[i];
1636   }
1637   ISRestoreIndices(is,&rows);
1638 
1639   starts[0] = 0;
1640   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1641   count = 0;
1642   for ( i=0; i<size; i++ ) {
1643     if (procs[i]) {
1644       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1645     }
1646   }
1647   PetscFree(starts);
1648 
1649   base = owners[rank]*bs;
1650 
1651   /*  wait on receives */
1652   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
1653   source = lens + nrecvs;
1654   count  = nrecvs; slen = 0;
1655   while (count) {
1656     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1657     /* unpack receives into our local space */
1658     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1659     source[imdex]  = recv_status.MPI_SOURCE;
1660     lens[imdex]  = n;
1661     slen += n;
1662     count--;
1663   }
1664   PetscFree(recv_waits);
1665 
1666   /* move the data into the send scatter */
1667   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
1668   count = 0;
1669   for ( i=0; i<nrecvs; i++ ) {
1670     values = rvalues + i*nmax;
1671     for ( j=0; j<lens[i]; j++ ) {
1672       lrows[count++] = values[j] - base;
1673     }
1674   }
1675   PetscFree(rvalues); PetscFree(lens);
1676   PetscFree(owner); PetscFree(nprocs);
1677 
1678   /* actually zap the local rows */
1679   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1680   PLogObjectParent(A,istmp);
1681 
1682   /*
1683         Zero the required rows. If the "diagonal block" of the matrix
1684      is square and the user wishes to set the diagonal we use seperate
1685      code so that MatSetValues() is not called for each diagonal allocating
1686      new memory, thus calling lots of mallocs and slowing things down.
1687 
1688        Contributed by: Mathew Knepley
1689   */
1690   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1691   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
1692   if (diag && (l->A->M == l->A->N)) {
1693     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
1694   } else if (diag) {
1695     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1696     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1697       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1698 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1699     }
1700     for ( i = 0; i < slen; i++ ) {
1701       row  = lrows[i] + rstart_bs;
1702       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1703     }
1704     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1705     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1706   } else {
1707     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1708   }
1709 
1710   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1711   PetscFree(lrows);
1712 
1713   /* wait on sends */
1714   if (nsends) {
1715     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1716     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1717     PetscFree(send_status);
1718   }
1719   PetscFree(send_waits); PetscFree(svalues);
1720 
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNC__
1725 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1726 int MatPrintHelp_MPIBAIJ(Mat A)
1727 {
1728   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1729   MPI_Comm    comm = A->comm;
1730   static int  called = 0;
1731   int         ierr;
1732 
1733   PetscFunctionBegin;
1734   if (!a->rank) {
1735     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1736   }
1737   if (called) {PetscFunctionReturn(0);} else called = 1;
1738   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1739   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNC__
1744 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1745 int MatSetUnfactored_MPIBAIJ(Mat A)
1746 {
1747   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1748   int         ierr;
1749 
1750   PetscFunctionBegin;
1751   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1756 
1757 #undef __FUNC__
1758 #define __FUNC__ "MatEqual_MPIBAIJ"
1759 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
1760 {
1761   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
1762   Mat         a, b, c, d;
1763   PetscTruth  flg;
1764   int         ierr;
1765 
1766   PetscFunctionBegin;
1767   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1768   a = matA->A; b = matA->B;
1769   c = matB->A; d = matB->B;
1770 
1771   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1772   if (flg == PETSC_TRUE) {
1773     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1774   }
1775   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 /* -------------------------------------------------------------------*/
1780 static struct _MatOps MatOps_Values = {
1781   MatSetValues_MPIBAIJ,
1782   MatGetRow_MPIBAIJ,
1783   MatRestoreRow_MPIBAIJ,
1784   MatMult_MPIBAIJ,
1785   MatMultAdd_MPIBAIJ,
1786   MatMultTrans_MPIBAIJ,
1787   MatMultTransAdd_MPIBAIJ,
1788   0,
1789   0,
1790   0,
1791   0,
1792   0,
1793   0,
1794   0,
1795   MatTranspose_MPIBAIJ,
1796   MatGetInfo_MPIBAIJ,
1797   MatEqual_MPIBAIJ,
1798   MatGetDiagonal_MPIBAIJ,
1799   MatDiagonalScale_MPIBAIJ,
1800   MatNorm_MPIBAIJ,
1801   MatAssemblyBegin_MPIBAIJ,
1802   MatAssemblyEnd_MPIBAIJ,
1803   0,
1804   MatSetOption_MPIBAIJ,
1805   MatZeroEntries_MPIBAIJ,
1806   MatZeroRows_MPIBAIJ,
1807   0,
1808   0,
1809   0,
1810   0,
1811   MatGetSize_MPIBAIJ,
1812   MatGetLocalSize_MPIBAIJ,
1813   MatGetOwnershipRange_MPIBAIJ,
1814   0,
1815   0,
1816   0,
1817   0,
1818   MatDuplicate_MPIBAIJ,
1819   0,
1820   0,
1821   0,
1822   0,
1823   0,
1824   MatGetSubMatrices_MPIBAIJ,
1825   MatIncreaseOverlap_MPIBAIJ,
1826   MatGetValues_MPIBAIJ,
1827   0,
1828   MatPrintHelp_MPIBAIJ,
1829   MatScale_MPIBAIJ,
1830   0,
1831   0,
1832   0,
1833   MatGetBlockSize_MPIBAIJ,
1834   0,
1835   0,
1836   0,
1837   0,
1838   0,
1839   0,
1840   MatSetUnfactored_MPIBAIJ,
1841   0,
1842   MatSetValuesBlocked_MPIBAIJ,
1843   0,
1844   0,
1845   0,
1846   MatGetMaps_Petsc};
1847 
1848 
1849 EXTERN_C_BEGIN
1850 #undef __FUNC__
1851 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1852 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1853 {
1854   PetscFunctionBegin;
1855   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1856   *iscopy = PETSC_FALSE;
1857   PetscFunctionReturn(0);
1858 }
1859 EXTERN_C_END
1860 
1861 #undef __FUNC__
1862 #define __FUNC__ "MatCreateMPIBAIJ"
1863 /*@C
1864    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1865    (block compressed row).  For good matrix assembly performance
1866    the user should preallocate the matrix storage by setting the parameters
1867    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1868    performance can be increased by more than a factor of 50.
1869 
1870    Collective on MPI_Comm
1871 
1872    Input Parameters:
1873 +  comm - MPI communicator
1874 .  bs   - size of blockk
1875 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1876            This value should be the same as the local size used in creating the
1877            y vector for the matrix-vector product y = Ax.
1878 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1879            This value should be the same as the local size used in creating the
1880            x vector for the matrix-vector product y = Ax.
1881 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1882 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1883 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1884            submatrix  (same for all local rows)
1885 .  d_nnz - array containing the number of block nonzeros in the various block rows
1886            of the in diagonal portion of the local (possibly different for each block
1887            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1888 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1889            submatrix (same for all local rows).
1890 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1891            off-diagonal portion of the local submatrix (possibly different for
1892            each block row) or PETSC_NULL.
1893 
1894    Output Parameter:
1895 .  A - the matrix
1896 
1897    Options Database Keys:
1898 .   -mat_no_unroll - uses code that does not unroll the loops in the
1899                      block calculations (much slower)
1900 .   -mat_block_size - size of the blocks to use
1901 .   -mat_mpi - use the parallel matrix data structures even on one processor
1902                (defaults to using SeqBAIJ format on one processor)
1903 
1904    Notes:
1905    The user MUST specify either the local or global matrix dimensions
1906    (possibly both).
1907 
1908    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1909    than it must be used on all processors that share the object for that argument.
1910 
1911    Storage Information:
1912    For a square global matrix we define each processor's diagonal portion
1913    to be its local rows and the corresponding columns (a square submatrix);
1914    each processor's off-diagonal portion encompasses the remainder of the
1915    local matrix (a rectangular submatrix).
1916 
1917    The user can specify preallocated storage for the diagonal part of
1918    the local submatrix with either d_nz or d_nnz (not both).  Set
1919    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1920    memory allocation.  Likewise, specify preallocated storage for the
1921    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1922 
1923    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1924    the figure below we depict these three local rows and all columns (0-11).
1925 
1926 .vb
1927            0 1 2 3 4 5 6 7 8 9 10 11
1928           -------------------
1929    row 3  |  o o o d d d o o o o o o
1930    row 4  |  o o o d d d o o o o o o
1931    row 5  |  o o o d d d o o o o o o
1932           -------------------
1933 .ve
1934 
1935    Thus, any entries in the d locations are stored in the d (diagonal)
1936    submatrix, and any entries in the o locations are stored in the
1937    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1938    stored simply in the MATSEQBAIJ format for compressed row storage.
1939 
1940    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1941    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1942    In general, for PDE problems in which most nonzeros are near the diagonal,
1943    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1944    or you will get TERRIBLE performance; see the users' manual chapter on
1945    matrices.
1946 
1947    Level: intermediate
1948 
1949 .keywords: matrix, block, aij, compressed row, sparse, parallel
1950 
1951 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1952 @*/
1953 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1954                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1955 {
1956   Mat          B;
1957   Mat_MPIBAIJ  *b;
1958   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1959   int          flag1 = 0,flag2 = 0;
1960 
1961   PetscFunctionBegin;
1962   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1963 
1964   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1965 
1966   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1967   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
1968   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1969   if (!flag1 && !flag2 && size == 1) {
1970     if (M == PETSC_DECIDE) M = m;
1971     if (N == PETSC_DECIDE) N = n;
1972     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1973     PetscFunctionReturn(0);
1974   }
1975 
1976   *A = 0;
1977   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
1978   PLogObjectCreate(B);
1979   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
1980   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
1981   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1982 
1983   B->ops->destroy    = MatDestroy_MPIBAIJ;
1984   B->ops->view       = MatView_MPIBAIJ;
1985   B->mapping    = 0;
1986   B->factor     = 0;
1987   B->assembled  = PETSC_FALSE;
1988 
1989   B->insertmode = NOT_SET_VALUES;
1990   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1991   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
1992 
1993   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1994     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1995   }
1996   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1997     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1998   }
1999   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2000     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2001   }
2002   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2003   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2004 
2005   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2006     work[0] = m; work[1] = n;
2007     mbs = m/bs; nbs = n/bs;
2008     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2009     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2010     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2011   }
2012   if (m == PETSC_DECIDE) {
2013     Mbs = M/bs;
2014     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2015     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2016     m   = mbs*bs;
2017   }
2018   if (n == PETSC_DECIDE) {
2019     Nbs = N/bs;
2020     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2021     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2022     n   = nbs*bs;
2023   }
2024   if (mbs*bs != m || nbs*bs != n) {
2025     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2026   }
2027 
2028   b->m = m; B->m = m;
2029   b->n = n; B->n = n;
2030   b->N = N; B->N = N;
2031   b->M = M; B->M = M;
2032   b->bs  = bs;
2033   b->bs2 = bs*bs;
2034   b->mbs = mbs;
2035   b->nbs = nbs;
2036   b->Mbs = Mbs;
2037   b->Nbs = Nbs;
2038 
2039   /* the information in the maps duplicates the information computed below, eventually
2040      we should remove the duplicate information that is not contained in the maps */
2041   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2042   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2043 
2044   /* build local table of row and column ownerships */
2045   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2046   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2047   b->cowners    = b->rowners + b->size + 2;
2048   b->rowners_bs = b->cowners + b->size + 2;
2049   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2050   b->rowners[0]    = 0;
2051   for ( i=2; i<=b->size; i++ ) {
2052     b->rowners[i] += b->rowners[i-1];
2053   }
2054   for ( i=0; i<=b->size; i++ ) {
2055     b->rowners_bs[i] = b->rowners[i]*bs;
2056   }
2057   b->rstart    = b->rowners[b->rank];
2058   b->rend      = b->rowners[b->rank+1];
2059   b->rstart_bs = b->rstart * bs;
2060   b->rend_bs   = b->rend * bs;
2061 
2062   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2063   b->cowners[0] = 0;
2064   for ( i=2; i<=b->size; i++ ) {
2065     b->cowners[i] += b->cowners[i-1];
2066   }
2067   b->cstart    = b->cowners[b->rank];
2068   b->cend      = b->cowners[b->rank+1];
2069   b->cstart_bs = b->cstart * bs;
2070   b->cend_bs   = b->cend * bs;
2071 
2072 
2073   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2074   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2075   PLogObjectParent(B,b->A);
2076   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2077   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2078   PLogObjectParent(B,b->B);
2079 
2080   /* build cache for off array entries formed */
2081   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2082   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2083   b->donotstash  = 0;
2084   b->colmap      = 0;
2085   b->garray      = 0;
2086   b->roworiented = 1;
2087 
2088   /* stuff used in block assembly */
2089   b->barray       = 0;
2090 
2091   /* stuff used for matrix vector multiply */
2092   b->lvec         = 0;
2093   b->Mvctx        = 0;
2094 
2095   /* stuff for MatGetRow() */
2096   b->rowindices   = 0;
2097   b->rowvalues    = 0;
2098   b->getrowactive = PETSC_FALSE;
2099 
2100   /* hash table stuff */
2101   b->ht           = 0;
2102   b->hd           = 0;
2103   b->ht_size      = 0;
2104   b->ht_flag      = 0;
2105   b->ht_fact      = 0;
2106   b->ht_total_ct  = 0;
2107   b->ht_insert_ct = 0;
2108 
2109   *A = B;
2110   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2111   if (flg) {
2112     double fact = 1.39;
2113     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2114     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr);
2115     if (fact <= 1.0) fact = 1.39;
2116     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2117     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2118   }
2119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
2120                                      "MatStoreValues_MPIBAIJ",
2121                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2122   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
2123                                      "MatRetrieveValues_MPIBAIJ",
2124                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2125   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2126                                      "MatGetDiagonalBlock_MPIBAIJ",
2127                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNC__
2132 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2133 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2134 {
2135   Mat         mat;
2136   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2137   int         ierr, len=0, flg;
2138 
2139   PetscFunctionBegin;
2140   *newmat       = 0;
2141   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2142   PLogObjectCreate(mat);
2143   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2144   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2145   mat->ops->destroy = MatDestroy_MPIBAIJ;
2146   mat->ops->view    = MatView_MPIBAIJ;
2147   mat->factor       = matin->factor;
2148   mat->assembled    = PETSC_TRUE;
2149   mat->insertmode   = NOT_SET_VALUES;
2150 
2151   a->m = mat->m   = oldmat->m;
2152   a->n = mat->n   = oldmat->n;
2153   a->M = mat->M   = oldmat->M;
2154   a->N = mat->N   = oldmat->N;
2155 
2156   a->bs  = oldmat->bs;
2157   a->bs2 = oldmat->bs2;
2158   a->mbs = oldmat->mbs;
2159   a->nbs = oldmat->nbs;
2160   a->Mbs = oldmat->Mbs;
2161   a->Nbs = oldmat->Nbs;
2162 
2163   a->rstart       = oldmat->rstart;
2164   a->rend         = oldmat->rend;
2165   a->cstart       = oldmat->cstart;
2166   a->cend         = oldmat->cend;
2167   a->size         = oldmat->size;
2168   a->rank         = oldmat->rank;
2169   a->donotstash   = oldmat->donotstash;
2170   a->roworiented  = oldmat->roworiented;
2171   a->rowindices   = 0;
2172   a->rowvalues    = 0;
2173   a->getrowactive = PETSC_FALSE;
2174   a->barray       = 0;
2175   a->rstart_bs    = oldmat->rstart_bs;
2176   a->rend_bs      = oldmat->rend_bs;
2177   a->cstart_bs    = oldmat->cstart_bs;
2178   a->cend_bs      = oldmat->cend_bs;
2179 
2180   /* hash table stuff */
2181   a->ht           = 0;
2182   a->hd           = 0;
2183   a->ht_size      = 0;
2184   a->ht_flag      = oldmat->ht_flag;
2185   a->ht_fact      = oldmat->ht_fact;
2186   a->ht_total_ct  = 0;
2187   a->ht_insert_ct = 0;
2188 
2189 
2190   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2191   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2192   a->cowners    = a->rowners + a->size + 2;
2193   a->rowners_bs = a->cowners + a->size + 2;
2194   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2195   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2196   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2197   if (oldmat->colmap) {
2198 #if defined (PETSC_USE_CTABLE)
2199   ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2200 #else
2201     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2202     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2203     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2204 #endif
2205   } else a->colmap = 0;
2206   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2207     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2208     PLogObjectMemory(mat,len*sizeof(int));
2209     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2210   } else a->garray = 0;
2211 
2212   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2213   PLogObjectParent(mat,a->lvec);
2214   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2215 
2216   PLogObjectParent(mat,a->Mvctx);
2217   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2218   PLogObjectParent(mat,a->A);
2219   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2220   PLogObjectParent(mat,a->B);
2221   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2222   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2223   if (flg) {
2224     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2225   }
2226   *newmat = mat;
2227   PetscFunctionReturn(0);
2228 }
2229 
2230 #include "sys.h"
2231 
2232 #undef __FUNC__
2233 #define __FUNC__ "MatLoad_MPIBAIJ"
2234 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2235 {
2236   Mat          A;
2237   int          i, nz, ierr, j,rstart, rend, fd;
2238   Scalar       *vals,*buf;
2239   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2240   MPI_Status   status;
2241   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2242   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2243   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2244   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2245   int          dcount,kmax,k,nzcount,tmp;
2246 
2247   PetscFunctionBegin;
2248   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2249 
2250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2251   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2252   if (!rank) {
2253     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2254     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2255     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2256     if (header[3] < 0) {
2257       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2258     }
2259   }
2260 
2261   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2262   M = header[1]; N = header[2];
2263 
2264   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2265 
2266   /*
2267      This code adds extra rows to make sure the number of rows is
2268      divisible by the blocksize
2269   */
2270   Mbs        = M/bs;
2271   extra_rows = bs - M + bs*(Mbs);
2272   if (extra_rows == bs) extra_rows = 0;
2273   else                  Mbs++;
2274   if (extra_rows &&!rank) {
2275     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2276   }
2277 
2278   /* determine ownership of all rows */
2279   mbs = Mbs/size + ((Mbs % size) > rank);
2280   m   = mbs * bs;
2281   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2282   browners = rowners + size + 1;
2283   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2284   rowners[0] = 0;
2285   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2286   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2287   rstart = rowners[rank];
2288   rend   = rowners[rank+1];
2289 
2290   /* distribute row lengths to all processors */
2291   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
2292   if (!rank) {
2293     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2294     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2295     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2296     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2297     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2298     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2299     PetscFree(sndcounts);
2300   } else {
2301     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2302   }
2303 
2304   if (!rank) {
2305     /* calculate the number of nonzeros on each processor */
2306     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2307     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2308     for ( i=0; i<size; i++ ) {
2309       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2310         procsnz[i] += rowlengths[j];
2311       }
2312     }
2313     PetscFree(rowlengths);
2314 
2315     /* determine max buffer needed and allocate it */
2316     maxnz = 0;
2317     for ( i=0; i<size; i++ ) {
2318       maxnz = PetscMax(maxnz,procsnz[i]);
2319     }
2320     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2321 
2322     /* read in my part of the matrix column indices  */
2323     nz = procsnz[0];
2324     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2325     mycols = ibuf;
2326     if (size == 1)  nz -= extra_rows;
2327     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2328     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2329 
2330     /* read in every ones (except the last) and ship off */
2331     for ( i=1; i<size-1; i++ ) {
2332       nz   = procsnz[i];
2333       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2334       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2335     }
2336     /* read in the stuff for the last proc */
2337     if ( size != 1 ) {
2338       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2339       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2340       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2341       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2342     }
2343     PetscFree(cols);
2344   } else {
2345     /* determine buffer space needed for message */
2346     nz = 0;
2347     for ( i=0; i<m; i++ ) {
2348       nz += locrowlens[i];
2349     }
2350     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2351     mycols = ibuf;
2352     /* receive message of column indices*/
2353     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2354     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2355     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2356   }
2357 
2358   /* loop over local rows, determining number of off diagonal entries */
2359   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2360   odlens = dlens + (rend-rstart);
2361   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2362   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2363   masked1 = mask    + Mbs;
2364   masked2 = masked1 + Mbs;
2365   rowcount = 0; nzcount = 0;
2366   for ( i=0; i<mbs; i++ ) {
2367     dcount  = 0;
2368     odcount = 0;
2369     for ( j=0; j<bs; j++ ) {
2370       kmax = locrowlens[rowcount];
2371       for ( k=0; k<kmax; k++ ) {
2372         tmp = mycols[nzcount++]/bs;
2373         if (!mask[tmp]) {
2374           mask[tmp] = 1;
2375           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2376           else masked1[dcount++] = tmp;
2377         }
2378       }
2379       rowcount++;
2380     }
2381 
2382     dlens[i]  = dcount;
2383     odlens[i] = odcount;
2384 
2385     /* zero out the mask elements we set */
2386     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2387     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2388   }
2389 
2390   /* create our matrix */
2391   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2392   A = *newmat;
2393   MatSetOption(A,MAT_COLUMNS_SORTED);
2394 
2395   if (!rank) {
2396     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
2397     /* read in my part of the matrix numerical values  */
2398     nz = procsnz[0];
2399     vals = buf;
2400     mycols = ibuf;
2401     if (size == 1)  nz -= extra_rows;
2402     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2403     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2404 
2405     /* insert into matrix */
2406     jj      = rstart*bs;
2407     for ( i=0; i<m; i++ ) {
2408       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2409       mycols += locrowlens[i];
2410       vals   += locrowlens[i];
2411       jj++;
2412     }
2413     /* read in other processors (except the last one) and ship out */
2414     for ( i=1; i<size-1; i++ ) {
2415       nz   = procsnz[i];
2416       vals = buf;
2417       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2418       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2419     }
2420     /* the last proc */
2421     if ( size != 1 ){
2422       nz   = procsnz[i] - extra_rows;
2423       vals = buf;
2424       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2425       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2426       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2427     }
2428     PetscFree(procsnz);
2429   } else {
2430     /* receive numeric values */
2431     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
2432 
2433     /* receive message of values*/
2434     vals   = buf;
2435     mycols = ibuf;
2436     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2437     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2438     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2439 
2440     /* insert into matrix */
2441     jj      = rstart*bs;
2442     for ( i=0; i<m; i++ ) {
2443       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2444       mycols += locrowlens[i];
2445       vals   += locrowlens[i];
2446       jj++;
2447     }
2448   }
2449   PetscFree(locrowlens);
2450   PetscFree(buf);
2451   PetscFree(ibuf);
2452   PetscFree(rowners);
2453   PetscFree(dlens);
2454   PetscFree(mask);
2455   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2456   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 
2461 
2462 #undef __FUNC__
2463 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2464 /*@
2465    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2466 
2467    Input Parameters:
2468 .  mat  - the matrix
2469 .  fact - factor
2470 
2471    Collective on Mat
2472 
2473    Level: advanced
2474 
2475   Notes:
2476    This can also be set by the command line option: -mat_use_hash_table fact
2477 
2478 .keywords: matrix, hashtable, factor, HT
2479 
2480 .seealso: MatSetOption()
2481 @*/
2482 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2483 {
2484   Mat_MPIBAIJ *baij;
2485 
2486   PetscFunctionBegin;
2487   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2488   if (mat->type != MATMPIBAIJ) {
2489       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2490   }
2491   baij = (Mat_MPIBAIJ*) mat->data;
2492   baij->ht_fact = fact;
2493   PetscFunctionReturn(0);
2494 }
2495