xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a84b093a86b993cdc13fc7a2e464c26044d2eea1)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.171 1999/05/12 03:29:54 bsmith Exp balay $";
3 #endif
4 
5 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6 #include "src/vec/vecimpl.h"
7 
8 extern int MatSetUpMultiply_MPIBAIJ(Mat);
9 extern int DisAssemble_MPIBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17 extern int MatPrintHelp_SeqBAIJ(Mat);
18 
19 EXTERN_C_BEGIN
20 #undef __FUNC__
21 #define __FUNC__ "MatStoreValues_MPIBAIJ"
22 int MatStoreValues_MPIBAIJ(Mat mat)
23 {
24   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
25   int         ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
29   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 EXTERN_C_END
33 
34 EXTERN_C_BEGIN
35 #undef __FUNC__
36 #define __FUNC__ "MatRetrieveValues_MPIBAIJ"
37 int MatRetrieveValues_MPIBAIJ(Mat mat)
38 {
39   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
40   int         ierr;
41 
42   PetscFunctionBegin;
43   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
44   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
45   PetscFunctionReturn(0);
46 }
47 EXTERN_C_END
48 
49 /*
50      Local utility routine that creates a mapping from the global column
51    number to the local number in the off-diagonal part of the local
52    storage of the matrix.  This is done in a non scable way since the
53    length of colmap equals the global matrix length.
54 */
55 #undef __FUNC__
56 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
57 static int CreateColmap_MPIBAIJ_Private(Mat mat)
58 {
59   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
60   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
61   int         nbs = B->nbs,i,bs=B->bs,ierr;
62 
63   PetscFunctionBegin;
64 #if defined (PETSC_USE_CTABLE)
65   ierr = TableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
66   for ( i=0; i<nbs; i++ ){
67     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
68   }
69 #else
70   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
71   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
72   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
73   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
74 #endif
75   PetscFunctionReturn(0);
76 }
77 
78 #define CHUNKSIZE  10
79 
80 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
81 { \
82  \
83     brow = row/bs;  \
84     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
85     rmax = aimax[brow]; nrow = ailen[brow]; \
86       bcol = col/bs; \
87       ridx = row % bs; cidx = col % bs; \
88       low = 0; high = nrow; \
89       while (high-low > 3) { \
90         t = (low+high)/2; \
91         if (rp[t] > bcol) high = t; \
92         else              low  = t; \
93       } \
94       for ( _i=low; _i<high; _i++ ) { \
95         if (rp[_i] > bcol) break; \
96         if (rp[_i] == bcol) { \
97           bap  = ap +  bs2*_i + bs*cidx + ridx; \
98           if (addv == ADD_VALUES) *bap += value;  \
99           else                    *bap  = value;  \
100           goto a_noinsert; \
101         } \
102       } \
103       if (a->nonew == 1) goto a_noinsert; \
104       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
105       if (nrow >= rmax) { \
106         /* there is no extra room in row, therefore enlarge */ \
107         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
108         Scalar *new_a; \
109  \
110         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
111  \
112         /* malloc new storage space */ \
113         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
114         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
115         new_j   = (int *) (new_a + bs2*new_nz); \
116         new_i   = new_j + new_nz; \
117  \
118         /* copy over old data into new slots */ \
119         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
120         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
121         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
122         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
123         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
124                           len*sizeof(int));CHKERRQ(ierr); \
125         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
126         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
127         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
128                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
129         /* free up old matrix storage */ \
130         PetscFree(a->a);  \
131         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = 1; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for ( ii=N; ii>=_i; ii-- ) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 
155 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
156 { \
157  \
158     brow = row/bs;  \
159     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
160     rmax = bimax[brow]; nrow = bilen[brow]; \
161       bcol = col/bs; \
162       ridx = row % bs; cidx = col % bs; \
163       low = 0; high = nrow; \
164       while (high-low > 3) { \
165         t = (low+high)/2; \
166         if (rp[t] > bcol) high = t; \
167         else              low  = t; \
168       } \
169       for ( _i=low; _i<high; _i++ ) { \
170         if (rp[_i] > bcol) break; \
171         if (rp[_i] == bcol) { \
172           bap  = ap +  bs2*_i + bs*cidx + ridx; \
173           if (addv == ADD_VALUES) *bap += value;  \
174           else                    *bap  = value;  \
175           goto b_noinsert; \
176         } \
177       } \
178       if (b->nonew == 1) goto b_noinsert; \
179       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
180       if (nrow >= rmax) { \
181         /* there is no extra room in row, therefore enlarge */ \
182         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
183         Scalar *new_a; \
184  \
185         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
186  \
187         /* malloc new storage space */ \
188         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
189         new_a   = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \
190         new_j   = (int *) (new_a + bs2*new_nz); \
191         new_i   = new_j + new_nz; \
192  \
193         /* copy over old data into new slots */ \
194         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
195         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
196         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
197         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
198         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
199                            len*sizeof(int));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar));CHKERRQ(ierr); \
201         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
202         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
203                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));CHKERRQ(ierr);  \
204         /* free up old matrix storage */ \
205         PetscFree(b->a);  \
206         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = 1; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for ( ii=N; ii>=_i; ii-- ) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 
230 #undef __FUNC__
231 #define __FUNC__ "MatSetValues_MPIBAIJ"
232 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
233 {
234   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
235   Scalar      value;
236   int         ierr,i,j,row,col;
237   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
238   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
239   int         cend_orig=baij->cend_bs,bs=baij->bs;
240 
241   /* Some Variables required in the macro */
242   Mat         A = baij->A;
243   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
244   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
245   Scalar      *aa=a->a;
246 
247   Mat         B = baij->B;
248   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
249   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
250   Scalar      *ba=b->a;
251 
252   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
253   int         low,high,t,ridx,cidx,bs2=a->bs2;
254   Scalar      *ap,*bap;
255 
256   PetscFunctionBegin;
257   for ( i=0; i<m; i++ ) {
258     if (im[i] < 0) continue;
259 #if defined(PETSC_USE_BOPT_g)
260     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
261 #endif
262     if (im[i] >= rstart_orig && im[i] < rend_orig) {
263       row = im[i] - rstart_orig;
264       for ( j=0; j<n; j++ ) {
265         if (in[j] >= cstart_orig && in[j] < cend_orig){
266           col = in[j] - cstart_orig;
267           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
268           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
269           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
270         } else if (in[j] < 0) continue;
271 #if defined(PETSC_USE_BOPT_g)
272         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
273 #endif
274         else {
275           if (mat->was_assembled) {
276             if (!baij->colmap) {
277               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
278             }
279 #if defined (PETSC_USE_CTABLE)
280             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col);CHKERRQ(ierr);
281             col  = col - 1 + in[j]%bs;
282 #else
283             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
284 #endif
285             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
286               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
287               col =  in[j];
288               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
289               B = baij->B;
290               b = (Mat_SeqBAIJ *) (B)->data;
291               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
292               ba=b->a;
293             }
294           } else col = in[j];
295           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
296           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
297           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
298         }
299       }
300     } else {
301       if ( !baij->donotstash) {
302         if (roworiented) {
303           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
304         } else {
305           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
306         }
307       }
308     }
309   }
310   PetscFunctionReturn(0);
311 }
312 
313 #undef __FUNC__
314 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
315 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
316 {
317   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
318   Scalar      *value,*barray=baij->barray;
319   int         ierr,i,j,ii,jj,row,col;
320   int         roworiented = baij->roworiented,rstart=baij->rstart ;
321   int         rend=baij->rend,cstart=baij->cstart,stepval;
322   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
323 
324   if(!barray) {
325     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar));CHKPTRQ(barray);
326   }
327 
328   if (roworiented) {
329     stepval = (n-1)*bs;
330   } else {
331     stepval = (m-1)*bs;
332   }
333   for ( i=0; i<m; i++ ) {
334     if (im[i] < 0) continue;
335 #if defined(PETSC_USE_BOPT_g)
336     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
337 #endif
338     if (im[i] >= rstart && im[i] < rend) {
339       row = im[i] - rstart;
340       for ( j=0; j<n; j++ ) {
341         /* If NumCol = 1 then a copy is not required */
342         if ((roworiented) && (n == 1)) {
343           barray = v + i*bs2;
344         } else if((!roworiented) && (m == 1)) {
345           barray = v + j*bs2;
346         } else { /* Here a copy is required */
347           if (roworiented) {
348             value = v + i*(stepval+bs)*bs + j*bs;
349           } else {
350             value = v + j*(stepval+bs)*bs + i*bs;
351           }
352           for ( ii=0; ii<bs; ii++,value+=stepval ) {
353             for (jj=0; jj<bs; jj++ ) {
354               *barray++  = *value++;
355             }
356           }
357           barray -=bs2;
358         }
359 
360         if (in[j] >= cstart && in[j] < cend){
361           col  = in[j] - cstart;
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364         else if (in[j] < 0) continue;
365 #if defined(PETSC_USE_BOPT_g)
366         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
367 #endif
368         else {
369           if (mat->was_assembled) {
370             if (!baij->colmap) {
371               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
372             }
373 
374 #if defined(PETSC_USE_BOPT_g)
375 #if defined (PETSC_USE_CTABLE)
376             { int data;
377             ierr = TableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
378             if((data - 1) % bs)
379 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
380             }
381 #else
382             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
383 #endif
384 #endif
385 #if defined (PETSC_USE_CTABLE)
386 	    ierr = TableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
387             col  = (col - 1)/bs;
388 #else
389             col = (baij->colmap[in[j]] - 1)/bs;
390 #endif
391             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
392               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
393               col =  in[j];
394             }
395           }
396           else col = in[j];
397           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
398         }
399       }
400     } else {
401       if (!baij->donotstash) {
402         if (roworiented) {
403           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
404         } else {
405           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
406         }
407       }
408     }
409   }
410   PetscFunctionReturn(0);
411 }
412 #define HASH_KEY 0.6180339887
413 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
414 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
415 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
416 #undef __FUNC__
417 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
418 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
419 {
420   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
421   int         ierr,i,j,row,col;
422   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
423   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
424   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
425   double      tmp;
426   Scalar      ** HD = baij->hd,value;
427 #if defined(PETSC_USE_BOPT_g)
428   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
429 #endif
430 
431   PetscFunctionBegin;
432 
433   for ( i=0; i<m; i++ ) {
434 #if defined(PETSC_USE_BOPT_g)
435     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
436     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
437 #endif
438       row = im[i];
439     if (row >= rstart_orig && row < rend_orig) {
440       for ( j=0; j<n; j++ ) {
441         col = in[j];
442         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
443         /* Look up into the Hash Table */
444         key = (row/bs)*Nbs+(col/bs)+1;
445         h1  = HASH(size,key,tmp);
446 
447 
448         idx = h1;
449 #if defined(PETSC_USE_BOPT_g)
450         insert_ct++;
451         total_ct++;
452         if (HT[idx] != key) {
453           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
454           if (idx == size) {
455             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
456             if (idx == h1) {
457               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
458             }
459           }
460         }
461 #else
462         if (HT[idx] != key) {
463           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
464           if (idx == size) {
465             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
466             if (idx == h1) {
467               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
468             }
469           }
470         }
471 #endif
472         /* A HASH table entry is found, so insert the values at the correct address */
473         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
474         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
475       }
476     } else {
477       if (!baij->donotstash) {
478         if (roworiented) {
479           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
480         } else {
481           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
482         }
483       }
484     }
485   }
486 #if defined(PETSC_USE_BOPT_g)
487   baij->ht_total_ct = total_ct;
488   baij->ht_insert_ct = insert_ct;
489 #endif
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNC__
494 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
495 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
496 {
497   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
498   int         ierr,i,j,ii,jj,row,col;
499   int         roworiented = baij->roworiented,rstart=baij->rstart ;
500   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
501   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
502   double      tmp;
503   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
504 #if defined(PETSC_USE_BOPT_g)
505   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
506 #endif
507 
508   PetscFunctionBegin;
509 
510   if (roworiented) {
511     stepval = (n-1)*bs;
512   } else {
513     stepval = (m-1)*bs;
514   }
515   for ( i=0; i<m; i++ ) {
516 #if defined(PETSC_USE_BOPT_g)
517     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
518     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
519 #endif
520     row   = im[i];
521     v_t   = v + i*bs2;
522     if (row >= rstart && row < rend) {
523       for ( j=0; j<n; j++ ) {
524         col = in[j];
525 
526         /* Look up into the Hash Table */
527         key = row*Nbs+col+1;
528         h1  = HASH(size,key,tmp);
529 
530         idx = h1;
531 #if defined(PETSC_USE_BOPT_g)
532         total_ct++;
533         insert_ct++;
534        if (HT[idx] != key) {
535           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
536           if (idx == size) {
537             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
538             if (idx == h1) {
539               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
540             }
541           }
542         }
543 #else
544         if (HT[idx] != key) {
545           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
546           if (idx == size) {
547             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
548             if (idx == h1) {
549               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
550             }
551           }
552         }
553 #endif
554         baij_a = HD[idx];
555         if (roworiented) {
556           /*value = v + i*(stepval+bs)*bs + j*bs;*/
557           /* value = v + (i*(stepval+bs)+j)*bs; */
558           value = v_t;
559           v_t  += bs;
560           if (addv == ADD_VALUES) {
561             for ( ii=0; ii<bs; ii++,value+=stepval) {
562               for ( jj=ii; jj<bs2; jj+=bs ) {
563                 baij_a[jj]  += *value++;
564               }
565             }
566           } else {
567             for ( ii=0; ii<bs; ii++,value+=stepval) {
568               for ( jj=ii; jj<bs2; jj+=bs ) {
569                 baij_a[jj]  = *value++;
570               }
571             }
572           }
573         } else {
574           value = v + j*(stepval+bs)*bs + i*bs;
575           if (addv == ADD_VALUES) {
576             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
577               for ( jj=0; jj<bs; jj++ ) {
578                 baij_a[jj]  += *value++;
579               }
580             }
581           } else {
582             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
583               for ( jj=0; jj<bs; jj++ ) {
584                 baij_a[jj]  = *value++;
585               }
586             }
587           }
588         }
589       }
590     } else {
591       if (!baij->donotstash) {
592         if (roworiented) {
593           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
594         } else {
595           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
596         }
597       }
598     }
599   }
600 #if defined(PETSC_USE_BOPT_g)
601   baij->ht_total_ct = total_ct;
602   baij->ht_insert_ct = insert_ct;
603 #endif
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNC__
608 #define __FUNC__ "MatGetValues_MPIBAIJ"
609 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
610 {
611   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
612   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
613   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
614 
615   PetscFunctionBegin;
616   for ( i=0; i<m; i++ ) {
617     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
618     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
619     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
620       row = idxm[i] - bsrstart;
621       for ( j=0; j<n; j++ ) {
622         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
623         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
624         if (idxn[j] >= bscstart && idxn[j] < bscend){
625           col = idxn[j] - bscstart;
626           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
627         } else {
628           if (!baij->colmap) {
629             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
630           }
631 #if defined (PETSC_USE_CTABLE)
632           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
633           data --;
634 #else
635           data = baij->colmap[idxn[j]/bs]-1;
636 #endif
637           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
638           else {
639             col  = data + idxn[j]%bs;
640             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
641           }
642         }
643       }
644     } else {
645       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
646     }
647   }
648  PetscFunctionReturn(0);
649 }
650 
651 #undef __FUNC__
652 #define __FUNC__ "MatNorm_MPIBAIJ"
653 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
654 {
655   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
656   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
657   int        ierr, i,bs2=baij->bs2;
658   double     sum = 0.0;
659   Scalar     *v;
660 
661   PetscFunctionBegin;
662   if (baij->size == 1) {
663     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
664   } else {
665     if (type == NORM_FROBENIUS) {
666       v = amat->a;
667       for (i=0; i<amat->nz*bs2; i++ ) {
668 #if defined(PETSC_USE_COMPLEX)
669         sum += PetscReal(PetscConj(*v)*(*v)); v++;
670 #else
671         sum += (*v)*(*v); v++;
672 #endif
673       }
674       v = bmat->a;
675       for (i=0; i<bmat->nz*bs2; i++ ) {
676 #if defined(PETSC_USE_COMPLEX)
677         sum += PetscReal(PetscConj(*v)*(*v)); v++;
678 #else
679         sum += (*v)*(*v); v++;
680 #endif
681       }
682       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
683       *norm = sqrt(*norm);
684     } else {
685       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
686     }
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 
692 /*
693   Creates the hash table, and sets the table
694   This table is created only once.
695   If new entried need to be added to the matrix
696   then the hash table has to be destroyed and
697   recreated.
698 */
699 #undef __FUNC__
700 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
701 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
702 {
703   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
704   Mat         A = baij->A, B=baij->B;
705   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
706   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
707   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
708   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
709   int         *HT,key;
710   Scalar      **HD;
711   double      tmp;
712 #if defined(PETSC_USE_BOPT_g)
713   int         ct=0,max=0;
714 #endif
715 
716   PetscFunctionBegin;
717   baij->ht_size=(int)(factor*nz);
718   size = baij->ht_size;
719 
720   if (baij->ht) {
721     PetscFunctionReturn(0);
722   }
723 
724   /* Allocate Memory for Hash Table */
725   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1);CHKPTRQ(baij->hd);
726   baij->ht = (int*)(baij->hd + size);
727   HD = baij->hd;
728   HT = baij->ht;
729 
730 
731   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
732 
733 
734   /* Loop Over A */
735   for ( i=0; i<a->mbs; i++ ) {
736     for ( j=ai[i]; j<ai[i+1]; j++ ) {
737       row = i+rstart;
738       col = aj[j]+cstart;
739 
740       key = row*Nbs + col + 1;
741       h1  = HASH(size,key,tmp);
742       for ( k=0; k<size; k++ ){
743         if (HT[(h1+k)%size] == 0.0) {
744           HT[(h1+k)%size] = key;
745           HD[(h1+k)%size] = a->a + j*bs2;
746           break;
747 #if defined(PETSC_USE_BOPT_g)
748         } else {
749           ct++;
750 #endif
751         }
752       }
753 #if defined(PETSC_USE_BOPT_g)
754       if (k> max) max = k;
755 #endif
756     }
757   }
758   /* Loop Over B */
759   for ( i=0; i<b->mbs; i++ ) {
760     for ( j=bi[i]; j<bi[i+1]; j++ ) {
761       row = i+rstart;
762       col = garray[bj[j]];
763       key = row*Nbs + col + 1;
764       h1  = HASH(size,key,tmp);
765       for ( k=0; k<size; k++ ){
766         if (HT[(h1+k)%size] == 0.0) {
767           HT[(h1+k)%size] = key;
768           HD[(h1+k)%size] = b->a + j*bs2;
769           break;
770 #if defined(PETSC_USE_BOPT_g)
771         } else {
772           ct++;
773 #endif
774         }
775       }
776 #if defined(PETSC_USE_BOPT_g)
777       if (k> max) max = k;
778 #endif
779     }
780   }
781 
782   /* Print Summary */
783 #if defined(PETSC_USE_BOPT_g)
784   for ( i=0,j=0; i<size; i++)
785     if (HT[i]) {j++;}
786   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
787            (j== 0)? 0.0:((double)(ct+j))/j,max);
788 #endif
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNC__
793 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
794 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
795 {
796   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
797   int         ierr,nstash,reallocs;
798   InsertMode  addv;
799 
800   PetscFunctionBegin;
801   if (baij->donotstash) {
802     PetscFunctionReturn(0);
803   }
804 
805   /* make sure all processors are either in INSERTMODE or ADDMODE */
806   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
807   if (addv == (ADD_VALUES|INSERT_VALUES)) {
808     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
809   }
810   mat->insertmode = addv; /* in case this processor had no cache */
811 
812   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
813   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
815   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
816   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
817   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNC__
822 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
823 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
826   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
827   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
828   int         *row,*col,other_disassembled,r1,r2,r3;
829   Scalar      *val;
830   InsertMode  addv = mat->insertmode;
831 
832   PetscFunctionBegin;
833   if (!baij->donotstash) {
834     while (1) {
835       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
836       if (!flg) break;
837 
838       for ( i=0; i<n; ) {
839         /* Now identify the consecutive vals belonging to the same row */
840         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
841         if (j < n) ncols = j-i;
842         else       ncols = n-i;
843         /* Now assemble all these values with a single function call */
844         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
845         i = j;
846       }
847     }
848     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
849     /* Now process the block-stash. Since the values are stashed column-oriented,
850        set the roworiented flag to column oriented, and after MatSetValues()
851        restore the original flags */
852     r1 = baij->roworiented;
853     r2 = a->roworiented;
854     r3 = b->roworiented;
855     baij->roworiented = 0;
856     a->roworiented    = 0;
857     b->roworiented    = 0;
858     while (1) {
859       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
860       if (!flg) break;
861 
862       for ( i=0; i<n; ) {
863         /* Now identify the consecutive vals belonging to the same row */
864         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
865         if (j < n) ncols = j-i;
866         else       ncols = n-i;
867         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
868         i = j;
869       }
870     }
871     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
872     baij->roworiented = r1;
873     a->roworiented    = r2;
874     b->roworiented    = r3;
875   }
876 
877   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
878   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
879 
880   /* determine if any processor has disassembled, if so we must
881      also disassemble ourselfs, in order that we may reassemble. */
882   /*
883      if nonzero structure of submatrix B cannot change then we know that
884      no processor disassembled thus we can skip this stuff
885   */
886   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
887     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
888     if (mat->was_assembled && !other_disassembled) {
889       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
890     }
891   }
892 
893   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
894     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
895   }
896   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
897   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
898 
899 #if defined(PETSC_USE_BOPT_g)
900   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
901     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
902              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
903     baij->ht_total_ct  = 0;
904     baij->ht_insert_ct = 0;
905   }
906 #endif
907   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
908     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
909     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
910     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
911   }
912 
913   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNC__
918 #define __FUNC__ "MatView_MPIBAIJ_Binary"
919 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
920 {
921   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
922   int          ierr;
923 
924   PetscFunctionBegin;
925   if (baij->size == 1) {
926     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
927   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNC__
932 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
933 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
934 {
935   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
936   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
937   FILE         *fd;
938   ViewerType   vtype;
939 
940   PetscFunctionBegin;
941   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
942   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
943     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
944     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
945       MatInfo info;
946       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
947       ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr);
948       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
949       ierr = PetscSequentialPhaseBegin(mat->comm,1);CHKERRQ(ierr);
950       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
951               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
952               baij->bs,(int)info.memory);
953       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
954       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
955       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
956       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
957       fflush(fd);
958       ierr = PetscSequentialPhaseEnd(mat->comm,1);CHKERRQ(ierr);
959       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
960       PetscFunctionReturn(0);
961     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
962       ierr = PetscPrintf(mat->comm,"  block size is %d\n",bs);CHKERRQ(ierr);
963       PetscFunctionReturn(0);
964     }
965   }
966 
967   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
968     Draw       draw;
969     PetscTruth isnull;
970     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
971     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
972   }
973 
974   if (size == 1) {
975     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
976   } else {
977     /* assemble the entire matrix onto first processor. */
978     Mat         A;
979     Mat_SeqBAIJ *Aloc;
980     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
981     int         mbs = baij->mbs;
982     Scalar      *a;
983 
984     if (!rank) {
985       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
986     } else {
987       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
988     }
989     PLogObjectParent(mat,A);
990 
991     /* copy over the A part */
992     Aloc = (Mat_SeqBAIJ*) baij->A->data;
993     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
994     rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
995 
996     for ( i=0; i<mbs; i++ ) {
997       rvals[0] = bs*(baij->rstart + i);
998       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
999       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1000         col = (baij->cstart+aj[j])*bs;
1001         for (k=0; k<bs; k++ ) {
1002           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1003           col++; a += bs;
1004         }
1005       }
1006     }
1007     /* copy over the B part */
1008     Aloc = (Mat_SeqBAIJ*) baij->B->data;
1009     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1010     for ( i=0; i<mbs; i++ ) {
1011       rvals[0] = bs*(baij->rstart + i);
1012       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1013       for ( j=ai[i]; j<ai[i+1]; j++ ) {
1014         col = baij->garray[aj[j]]*bs;
1015         for (k=0; k<bs; k++ ) {
1016           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1017           col++; a += bs;
1018         }
1019       }
1020     }
1021     PetscFree(rvals);
1022     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1023     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1024     /*
1025        Everyone has to call to draw the matrix since the graphics waits are
1026        synchronized across all processors that share the Draw object
1027     */
1028     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
1029       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer);CHKERRQ(ierr);
1030     }
1031     ierr = MatDestroy(A);CHKERRQ(ierr);
1032   }
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 
1037 
1038 #undef __FUNC__
1039 #define __FUNC__ "MatView_MPIBAIJ"
1040 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1041 {
1042   int         ierr;
1043   ViewerType  vtype;
1044 
1045   PetscFunctionBegin;
1046   ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr);
1047   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
1048       PetscTypeCompare(vtype,SOCKET_VIEWER)|| PetscTypeCompare(vtype,BINARY_VIEWER)) {
1049     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1050     /*
1051   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
1052     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1053     */
1054   } else {
1055     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNC__
1061 #define __FUNC__ "MatDestroy_MPIBAIJ"
1062 int MatDestroy_MPIBAIJ(Mat mat)
1063 {
1064   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1065   int         ierr;
1066 
1067   PetscFunctionBegin;
1068   if (--mat->refct > 0) PetscFunctionReturn(0);
1069 
1070   if (mat->mapping) {
1071     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1072   }
1073   if (mat->bmapping) {
1074     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1075   }
1076   if (mat->rmap) {
1077     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1078   }
1079   if (mat->cmap) {
1080     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1081   }
1082 #if defined(PETSC_USE_LOG)
1083   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1084 #endif
1085 
1086   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1087   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1088 
1089   PetscFree(baij->rowners);
1090   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1091   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1092 #if defined (PETSC_USE_CTABLE)
1093   if (baij->colmap) TableDelete(baij->colmap);
1094 #else
1095   if (baij->colmap) PetscFree(baij->colmap);
1096 #endif
1097   if (baij->garray) PetscFree(baij->garray);
1098   if (baij->lvec)   VecDestroy(baij->lvec);
1099   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1100   if (baij->rowvalues) PetscFree(baij->rowvalues);
1101   if (baij->barray) PetscFree(baij->barray);
1102   if (baij->hd) PetscFree(baij->hd);
1103   PetscFree(baij);
1104   PLogObjectDestroy(mat);
1105   PetscHeaderDestroy(mat);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNC__
1110 #define __FUNC__ "MatMult_MPIBAIJ"
1111 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1112 {
1113   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1114   int         ierr, nt;
1115 
1116   PetscFunctionBegin;
1117   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1118   if (nt != a->n) {
1119     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1120   }
1121   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1122   if (nt != a->m) {
1123     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1124   }
1125   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1126   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1127   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1128   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1129   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNC__
1134 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1135 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1136 {
1137   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1138   int        ierr;
1139 
1140   PetscFunctionBegin;
1141   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1142   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1143   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1144   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNC__
1149 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1150 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1151 {
1152   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1153   int         ierr;
1154 
1155   PetscFunctionBegin;
1156   /* do nondiagonal part */
1157   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1158   /* send it on its way */
1159   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1160   /* do local part */
1161   ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr);
1162   /* receive remote parts: note this assumes the values are not actually */
1163   /* inserted in yy until the next line, which is true for my implementation*/
1164   /* but is not perhaps always true. */
1165   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNC__
1170 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1171 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1172 {
1173   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1174   int         ierr;
1175 
1176   PetscFunctionBegin;
1177   /* do nondiagonal part */
1178   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr);
1179   /* send it on its way */
1180   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1181   /* do local part */
1182   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1183   /* receive remote parts: note this assumes the values are not actually */
1184   /* inserted in yy until the next line, which is true for my implementation*/
1185   /* but is not perhaps always true. */
1186   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 /*
1191   This only works correctly for square matrices where the subblock A->A is the
1192    diagonal block
1193 */
1194 #undef __FUNC__
1195 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1196 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1197 {
1198   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1199   int         ierr;
1200 
1201   PetscFunctionBegin;
1202   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1203   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNC__
1208 #define __FUNC__ "MatScale_MPIBAIJ"
1209 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1210 {
1211   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1212   int         ierr;
1213 
1214   PetscFunctionBegin;
1215   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1216   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNC__
1221 #define __FUNC__ "MatGetSize_MPIBAIJ"
1222 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1223 {
1224   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1225 
1226   PetscFunctionBegin;
1227   if (m) *m = mat->M;
1228   if (n) *n = mat->N;
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNC__
1233 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1234 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1235 {
1236   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1237 
1238   PetscFunctionBegin;
1239   *m = mat->m; *n = mat->n;
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNC__
1244 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1245 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1246 {
1247   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1248 
1249   PetscFunctionBegin;
1250   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNC__
1255 #define __FUNC__ "MatGetRow_MPIBAIJ"
1256 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1257 {
1258   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1259   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1260   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1261   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1262   int        *cmap, *idx_p,cstart = mat->cstart;
1263 
1264   PetscFunctionBegin;
1265   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1266   mat->getrowactive = PETSC_TRUE;
1267 
1268   if (!mat->rowvalues && (idx || v)) {
1269     /*
1270         allocate enough space to hold information from the longest row.
1271     */
1272     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1273     int     max = 1,mbs = mat->mbs,tmp;
1274     for ( i=0; i<mbs; i++ ) {
1275       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1276       if (max < tmp) { max = tmp; }
1277     }
1278     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1279     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1280   }
1281 
1282   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1283   lrow = row - brstart;
1284 
1285   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1286   if (!v)   {pvA = 0; pvB = 0;}
1287   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1288   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1289   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1290   nztot = nzA + nzB;
1291 
1292   cmap  = mat->garray;
1293   if (v  || idx) {
1294     if (nztot) {
1295       /* Sort by increasing column numbers, assuming A and B already sorted */
1296       int imark = -1;
1297       if (v) {
1298         *v = v_p = mat->rowvalues;
1299         for ( i=0; i<nzB; i++ ) {
1300           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1301           else break;
1302         }
1303         imark = i;
1304         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1305         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1306       }
1307       if (idx) {
1308         *idx = idx_p = mat->rowindices;
1309         if (imark > -1) {
1310           for ( i=0; i<imark; i++ ) {
1311             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1312           }
1313         } else {
1314           for ( i=0; i<nzB; i++ ) {
1315             if (cmap[cworkB[i]/bs] < cstart)
1316               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1317             else break;
1318           }
1319           imark = i;
1320         }
1321         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1322         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1323       }
1324     } else {
1325       if (idx) *idx = 0;
1326       if (v)   *v   = 0;
1327     }
1328   }
1329   *nz = nztot;
1330   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1331   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNC__
1336 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1337 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1338 {
1339   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1340 
1341   PetscFunctionBegin;
1342   if (baij->getrowactive == PETSC_FALSE) {
1343     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1344   }
1345   baij->getrowactive = PETSC_FALSE;
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNC__
1350 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1351 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1352 {
1353   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1354 
1355   PetscFunctionBegin;
1356   *bs = baij->bs;
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNC__
1361 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1362 int MatZeroEntries_MPIBAIJ(Mat A)
1363 {
1364   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1365   int         ierr;
1366 
1367   PetscFunctionBegin;
1368   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1369   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNC__
1374 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1375 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1376 {
1377   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1378   Mat         A = a->A, B = a->B;
1379   int         ierr;
1380   double      isend[5], irecv[5];
1381 
1382   PetscFunctionBegin;
1383   info->block_size     = (double)a->bs;
1384   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1385   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1386   isend[3] = info->memory;  isend[4] = info->mallocs;
1387   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1388   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1389   isend[3] += info->memory;  isend[4] += info->mallocs;
1390   if (flag == MAT_LOCAL) {
1391     info->nz_used      = isend[0];
1392     info->nz_allocated = isend[1];
1393     info->nz_unneeded  = isend[2];
1394     info->memory       = isend[3];
1395     info->mallocs      = isend[4];
1396   } else if (flag == MAT_GLOBAL_MAX) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else if (flag == MAT_GLOBAL_SUM) {
1404     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1405     info->nz_used      = irecv[0];
1406     info->nz_allocated = irecv[1];
1407     info->nz_unneeded  = irecv[2];
1408     info->memory       = irecv[3];
1409     info->mallocs      = irecv[4];
1410   } else {
1411     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1412   }
1413   info->rows_global       = (double)a->M;
1414   info->columns_global    = (double)a->N;
1415   info->rows_local        = (double)a->m;
1416   info->columns_local     = (double)a->N;
1417   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1418   info->fill_ratio_needed = 0;
1419   info->factor_mallocs    = 0;
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNC__
1424 #define __FUNC__ "MatSetOption_MPIBAIJ"
1425 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1426 {
1427   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1428   int         ierr;
1429 
1430   PetscFunctionBegin;
1431   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1432       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1433       op == MAT_COLUMNS_UNSORTED ||
1434       op == MAT_COLUMNS_SORTED ||
1435       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1436       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1437         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1438         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1439   } else if (op == MAT_ROW_ORIENTED) {
1440         a->roworiented = 1;
1441         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1442         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1443   } else if (op == MAT_ROWS_SORTED ||
1444              op == MAT_ROWS_UNSORTED ||
1445              op == MAT_SYMMETRIC ||
1446              op == MAT_STRUCTURALLY_SYMMETRIC ||
1447              op == MAT_YES_NEW_DIAGONALS ||
1448              op == MAT_USE_HASH_TABLE) {
1449     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1450   } else if (op == MAT_COLUMN_ORIENTED) {
1451     a->roworiented = 0;
1452     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1453     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1454   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1455     a->donotstash = 1;
1456   } else if (op == MAT_NO_NEW_DIAGONALS) {
1457     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1458   } else if (op == MAT_USE_HASH_TABLE) {
1459     a->ht_flag = 1;
1460   } else {
1461     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNC__
1467 #define __FUNC__ "MatTranspose_MPIBAIJ("
1468 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1469 {
1470   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1471   Mat_SeqBAIJ *Aloc;
1472   Mat        B;
1473   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1474   int        bs=baij->bs,mbs=baij->mbs;
1475   Scalar     *a;
1476 
1477   PetscFunctionBegin;
1478   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1479   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1480 CHKERRQ(ierr);
1481 
1482   /* copy over the A part */
1483   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1484   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1485   rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1486 
1487   for ( i=0; i<mbs; i++ ) {
1488     rvals[0] = bs*(baij->rstart + i);
1489     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1490     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1491       col = (baij->cstart+aj[j])*bs;
1492       for (k=0; k<bs; k++ ) {
1493         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1494         col++; a += bs;
1495       }
1496     }
1497   }
1498   /* copy over the B part */
1499   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1500   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1501   for ( i=0; i<mbs; i++ ) {
1502     rvals[0] = bs*(baij->rstart + i);
1503     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1504     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1505       col = baij->garray[aj[j]]*bs;
1506       for (k=0; k<bs; k++ ) {
1507         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1508         col++; a += bs;
1509       }
1510     }
1511   }
1512   PetscFree(rvals);
1513   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1514   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1515 
1516   if (matout != PETSC_NULL) {
1517     *matout = B;
1518   } else {
1519     PetscOps *Abops;
1520     MatOps   Aops;
1521 
1522     /* This isn't really an in-place transpose .... but free data structures from baij */
1523     PetscFree(baij->rowners);
1524     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1525     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1526 #if defined (PETSC_USE_CTABLE)
1527     if (baij->colmap) TableDelete(baij->colmap);
1528 #else
1529     if (baij->colmap) PetscFree(baij->colmap);
1530 #endif
1531     if (baij->garray) PetscFree(baij->garray);
1532     if (baij->lvec) VecDestroy(baij->lvec);
1533     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1534     PetscFree(baij);
1535 
1536     /*
1537        This is horrible, horrible code. We need to keep the
1538       A pointers for the bops and ops but copy everything
1539       else from C.
1540     */
1541     Abops   = A->bops;
1542     Aops    = A->ops;
1543     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1544     A->bops = Abops;
1545     A->ops  = Aops;
1546 
1547     PetscHeaderDestroy(B);
1548   }
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 #undef __FUNC__
1553 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1554 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1555 {
1556   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1557   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1558   int ierr,s1,s2,s3;
1559 
1560   PetscFunctionBegin;
1561   if (ll)  {
1562     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1563     ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr);
1564     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1565     ierr = MatDiagonalScale(a,ll,0);CHKERRQ(ierr);
1566     ierr = MatDiagonalScale(b,ll,0);CHKERRQ(ierr);
1567   }
1568   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 #undef __FUNC__
1573 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1574 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1575 {
1576   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1577   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1578   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1579   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1580   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1581   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1582   MPI_Comm       comm = A->comm;
1583   MPI_Request    *send_waits,*recv_waits;
1584   MPI_Status     recv_status,*send_status;
1585   IS             istmp;
1586 
1587   PetscFunctionBegin;
1588   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1589   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1590 
1591   /*  first count number of contributors to each processor */
1592   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs);
1593   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1594  procs   = nprocs + size;
1595   owner  = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1596   for ( i=0; i<N; i++ ) {
1597     idx = rows[i];
1598     found = 0;
1599     for ( j=0; j<size; j++ ) {
1600       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1601         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1602       }
1603     }
1604     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1605   }
1606   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1607 
1608   /* inform other processors of number of messages and max length*/
1609   work   = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work);
1610   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1611   nrecvs = work[rank];
1612   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1613   nmax   = work[rank];
1614   PetscFree(work);
1615 
1616   /* post receives:   */
1617   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1618   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1619   for ( i=0; i<nrecvs; i++ ) {
1620     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1621   }
1622 
1623   /* do sends:
1624      1) starts[i] gives the starting index in svalues for stuff going to
1625      the ith processor
1626   */
1627   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues);
1628   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1629   starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts);
1630   starts[0] = 0;
1631   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1632   for ( i=0; i<N; i++ ) {
1633     svalues[starts[owner[i]]++] = rows[i];
1634   }
1635   ISRestoreIndices(is,&rows);
1636 
1637   starts[0] = 0;
1638   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1639   count = 0;
1640   for ( i=0; i<size; i++ ) {
1641     if (procs[i]) {
1642       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1643     }
1644   }
1645   PetscFree(starts);
1646 
1647   base = owners[rank]*bs;
1648 
1649   /*  wait on receives */
1650   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens);
1651   source = lens + nrecvs;
1652   count  = nrecvs; slen = 0;
1653   while (count) {
1654     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1655     /* unpack receives into our local space */
1656     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1657     source[imdex]  = recv_status.MPI_SOURCE;
1658     lens[imdex]  = n;
1659     slen += n;
1660     count--;
1661   }
1662   PetscFree(recv_waits);
1663 
1664   /* move the data into the send scatter */
1665   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows);
1666   count = 0;
1667   for ( i=0; i<nrecvs; i++ ) {
1668     values = rvalues + i*nmax;
1669     for ( j=0; j<lens[i]; j++ ) {
1670       lrows[count++] = values[j] - base;
1671     }
1672   }
1673   PetscFree(rvalues); PetscFree(lens);
1674   PetscFree(owner); PetscFree(nprocs);
1675 
1676   /* actually zap the local rows */
1677   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1678   PLogObjectParent(A,istmp);
1679 
1680   /*
1681         Zero the required rows. If the "diagonal block" of the matrix
1682      is square and the user wishes to set the diagonal we use seperate
1683      code so that MatSetValues() is not called for each diagonal allocating
1684      new memory, thus calling lots of mallocs and slowing things down.
1685 
1686        Contributed by: Mathew Knepley
1687   */
1688   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1689   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
1690   if (diag && (l->A->M == l->A->N)) {
1691     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
1692   } else if (diag) {
1693     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1694     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1695       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1696 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1697     }
1698     for ( i = 0; i < slen; i++ ) {
1699       row  = lrows[i] + rstart_bs;
1700       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1701     }
1702     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1703     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1704   } else {
1705     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
1706   }
1707 
1708   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1709   PetscFree(lrows);
1710 
1711   /* wait on sends */
1712   if (nsends) {
1713     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1714     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1715     PetscFree(send_status);
1716   }
1717   PetscFree(send_waits); PetscFree(svalues);
1718 
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 #undef __FUNC__
1723 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1724 int MatPrintHelp_MPIBAIJ(Mat A)
1725 {
1726   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1727   MPI_Comm    comm = A->comm;
1728   static int  called = 0;
1729   int         ierr;
1730 
1731   PetscFunctionBegin;
1732   if (!a->rank) {
1733     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1734   }
1735   if (called) {PetscFunctionReturn(0);} else called = 1;
1736   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1737   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 #undef __FUNC__
1742 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1743 int MatSetUnfactored_MPIBAIJ(Mat A)
1744 {
1745   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1746   int         ierr;
1747 
1748   PetscFunctionBegin;
1749   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1754 
1755 #undef __FUNC__
1756 #define __FUNC__ "MatEqual_MPIBAIJ"
1757 int MatEqual_MPIBAIJ(Mat A, Mat B, PetscTruth *flag)
1758 {
1759   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *) B->data,*matA = (Mat_MPIBAIJ *) A->data;
1760   Mat         a, b, c, d;
1761   PetscTruth  flg;
1762   int         ierr;
1763 
1764   PetscFunctionBegin;
1765   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1766   a = matA->A; b = matA->B;
1767   c = matB->A; d = matB->B;
1768 
1769   ierr = MatEqual(a, c, &flg);CHKERRQ(ierr);
1770   if (flg == PETSC_TRUE) {
1771     ierr = MatEqual(b, d, &flg);CHKERRQ(ierr);
1772   }
1773   ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr);
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 /* -------------------------------------------------------------------*/
1778 static struct _MatOps MatOps_Values = {
1779   MatSetValues_MPIBAIJ,
1780   MatGetRow_MPIBAIJ,
1781   MatRestoreRow_MPIBAIJ,
1782   MatMult_MPIBAIJ,
1783   MatMultAdd_MPIBAIJ,
1784   MatMultTrans_MPIBAIJ,
1785   MatMultTransAdd_MPIBAIJ,
1786   0,
1787   0,
1788   0,
1789   0,
1790   0,
1791   0,
1792   0,
1793   MatTranspose_MPIBAIJ,
1794   MatGetInfo_MPIBAIJ,
1795   MatEqual_MPIBAIJ,
1796   MatGetDiagonal_MPIBAIJ,
1797   MatDiagonalScale_MPIBAIJ,
1798   MatNorm_MPIBAIJ,
1799   MatAssemblyBegin_MPIBAIJ,
1800   MatAssemblyEnd_MPIBAIJ,
1801   0,
1802   MatSetOption_MPIBAIJ,
1803   MatZeroEntries_MPIBAIJ,
1804   MatZeroRows_MPIBAIJ,
1805   0,
1806   0,
1807   0,
1808   0,
1809   MatGetSize_MPIBAIJ,
1810   MatGetLocalSize_MPIBAIJ,
1811   MatGetOwnershipRange_MPIBAIJ,
1812   0,
1813   0,
1814   0,
1815   0,
1816   MatDuplicate_MPIBAIJ,
1817   0,
1818   0,
1819   0,
1820   0,
1821   0,
1822   MatGetSubMatrices_MPIBAIJ,
1823   MatIncreaseOverlap_MPIBAIJ,
1824   MatGetValues_MPIBAIJ,
1825   0,
1826   MatPrintHelp_MPIBAIJ,
1827   MatScale_MPIBAIJ,
1828   0,
1829   0,
1830   0,
1831   MatGetBlockSize_MPIBAIJ,
1832   0,
1833   0,
1834   0,
1835   0,
1836   0,
1837   0,
1838   MatSetUnfactored_MPIBAIJ,
1839   0,
1840   MatSetValuesBlocked_MPIBAIJ,
1841   0,
1842   0,
1843   0,
1844   MatGetMaps_Petsc};
1845 
1846 
1847 EXTERN_C_BEGIN
1848 #undef __FUNC__
1849 #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
1850 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1851 {
1852   PetscFunctionBegin;
1853   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1854   *iscopy = PETSC_FALSE;
1855   PetscFunctionReturn(0);
1856 }
1857 EXTERN_C_END
1858 
1859 #undef __FUNC__
1860 #define __FUNC__ "MatCreateMPIBAIJ"
1861 /*@C
1862    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1863    (block compressed row).  For good matrix assembly performance
1864    the user should preallocate the matrix storage by setting the parameters
1865    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1866    performance can be increased by more than a factor of 50.
1867 
1868    Collective on MPI_Comm
1869 
1870    Input Parameters:
1871 +  comm - MPI communicator
1872 .  bs   - size of blockk
1873 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1874            This value should be the same as the local size used in creating the
1875            y vector for the matrix-vector product y = Ax.
1876 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1877            This value should be the same as the local size used in creating the
1878            x vector for the matrix-vector product y = Ax.
1879 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1880 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1881 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1882            submatrix  (same for all local rows)
1883 .  d_nnz - array containing the number of block nonzeros in the various block rows
1884            of the in diagonal portion of the local (possibly different for each block
1885            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1886 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1887            submatrix (same for all local rows).
1888 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1889            off-diagonal portion of the local submatrix (possibly different for
1890            each block row) or PETSC_NULL.
1891 
1892    Output Parameter:
1893 .  A - the matrix
1894 
1895    Options Database Keys:
1896 .   -mat_no_unroll - uses code that does not unroll the loops in the
1897                      block calculations (much slower)
1898 .   -mat_block_size - size of the blocks to use
1899 .   -mat_mpi - use the parallel matrix data structures even on one processor
1900                (defaults to using SeqBAIJ format on one processor)
1901 
1902    Notes:
1903    The user MUST specify either the local or global matrix dimensions
1904    (possibly both).
1905 
1906    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1907    than it must be used on all processors that share the object for that argument.
1908 
1909    Storage Information:
1910    For a square global matrix we define each processor's diagonal portion
1911    to be its local rows and the corresponding columns (a square submatrix);
1912    each processor's off-diagonal portion encompasses the remainder of the
1913    local matrix (a rectangular submatrix).
1914 
1915    The user can specify preallocated storage for the diagonal part of
1916    the local submatrix with either d_nz or d_nnz (not both).  Set
1917    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1918    memory allocation.  Likewise, specify preallocated storage for the
1919    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1920 
1921    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1922    the figure below we depict these three local rows and all columns (0-11).
1923 
1924 .vb
1925            0 1 2 3 4 5 6 7 8 9 10 11
1926           -------------------
1927    row 3  |  o o o d d d o o o o o o
1928    row 4  |  o o o d d d o o o o o o
1929    row 5  |  o o o d d d o o o o o o
1930           -------------------
1931 .ve
1932 
1933    Thus, any entries in the d locations are stored in the d (diagonal)
1934    submatrix, and any entries in the o locations are stored in the
1935    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1936    stored simply in the MATSEQBAIJ format for compressed row storage.
1937 
1938    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1939    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1940    In general, for PDE problems in which most nonzeros are near the diagonal,
1941    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1942    or you will get TERRIBLE performance; see the users' manual chapter on
1943    matrices.
1944 
1945    Level: intermediate
1946 
1947 .keywords: matrix, block, aij, compressed row, sparse, parallel
1948 
1949 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1950 @*/
1951 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1952                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1953 {
1954   Mat          B;
1955   Mat_MPIBAIJ  *b;
1956   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1957   int          flag1 = 0,flag2 = 0;
1958 
1959   PetscFunctionBegin;
1960   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1961 
1962   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1963 
1964   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1965   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
1966   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
1967   if (!flag1 && !flag2 && size == 1) {
1968     if (M == PETSC_DECIDE) M = m;
1969     if (N == PETSC_DECIDE) N = n;
1970     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
1971     PetscFunctionReturn(0);
1972   }
1973 
1974   *A = 0;
1975   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
1976   PLogObjectCreate(B);
1977   B->data = (void *) (b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
1978   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
1979   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1980 
1981   B->ops->destroy    = MatDestroy_MPIBAIJ;
1982   B->ops->view       = MatView_MPIBAIJ;
1983   B->mapping    = 0;
1984   B->factor     = 0;
1985   B->assembled  = PETSC_FALSE;
1986 
1987   B->insertmode = NOT_SET_VALUES;
1988   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
1989   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
1990 
1991   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1992     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1993   }
1994   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1995     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1996   }
1997   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1998     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1999   }
2000   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2001   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2002 
2003   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2004     work[0] = m; work[1] = n;
2005     mbs = m/bs; nbs = n/bs;
2006     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2007     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2008     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2009   }
2010   if (m == PETSC_DECIDE) {
2011     Mbs = M/bs;
2012     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2013     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2014     m   = mbs*bs;
2015   }
2016   if (n == PETSC_DECIDE) {
2017     Nbs = N/bs;
2018     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2019     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2020     n   = nbs*bs;
2021   }
2022   if (mbs*bs != m || nbs*bs != n) {
2023     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2024   }
2025 
2026   b->m = m; B->m = m;
2027   b->n = n; B->n = n;
2028   b->N = N; B->N = N;
2029   b->M = M; B->M = M;
2030   b->bs  = bs;
2031   b->bs2 = bs*bs;
2032   b->mbs = mbs;
2033   b->nbs = nbs;
2034   b->Mbs = Mbs;
2035   b->Nbs = Nbs;
2036 
2037   /* the information in the maps duplicates the information computed below, eventually
2038      we should remove the duplicate information that is not contained in the maps */
2039   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2040   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2041 
2042   /* build local table of row and column ownerships */
2043   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2044   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2045   b->cowners    = b->rowners + b->size + 2;
2046   b->rowners_bs = b->cowners + b->size + 2;
2047   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2048   b->rowners[0]    = 0;
2049   for ( i=2; i<=b->size; i++ ) {
2050     b->rowners[i] += b->rowners[i-1];
2051   }
2052   for ( i=0; i<=b->size; i++ ) {
2053     b->rowners_bs[i] = b->rowners[i]*bs;
2054   }
2055   b->rstart    = b->rowners[b->rank];
2056   b->rend      = b->rowners[b->rank+1];
2057   b->rstart_bs = b->rstart * bs;
2058   b->rend_bs   = b->rend * bs;
2059 
2060   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2061   b->cowners[0] = 0;
2062   for ( i=2; i<=b->size; i++ ) {
2063     b->cowners[i] += b->cowners[i-1];
2064   }
2065   b->cstart    = b->cowners[b->rank];
2066   b->cend      = b->cowners[b->rank+1];
2067   b->cstart_bs = b->cstart * bs;
2068   b->cend_bs   = b->cend * bs;
2069 
2070 
2071   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2072   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2073   PLogObjectParent(B,b->A);
2074   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2075   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2076   PLogObjectParent(B,b->B);
2077 
2078   /* build cache for off array entries formed */
2079   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2080   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2081   b->donotstash  = 0;
2082   b->colmap      = 0;
2083   b->garray      = 0;
2084   b->roworiented = 1;
2085 
2086   /* stuff used in block assembly */
2087   b->barray       = 0;
2088 
2089   /* stuff used for matrix vector multiply */
2090   b->lvec         = 0;
2091   b->Mvctx        = 0;
2092 
2093   /* stuff for MatGetRow() */
2094   b->rowindices   = 0;
2095   b->rowvalues    = 0;
2096   b->getrowactive = PETSC_FALSE;
2097 
2098   /* hash table stuff */
2099   b->ht           = 0;
2100   b->hd           = 0;
2101   b->ht_size      = 0;
2102   b->ht_flag      = 0;
2103   b->ht_fact      = 0;
2104   b->ht_total_ct  = 0;
2105   b->ht_insert_ct = 0;
2106 
2107   *A = B;
2108   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2109   if (flg) {
2110     double fact = 1.39;
2111     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2112     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg);CHKERRQ(ierr);
2113     if (fact <= 1.0) fact = 1.39;
2114     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2115     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2116   }
2117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",
2118                                      "MatStoreValues_MPIBAIJ",
2119                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2120   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",
2121                                      "MatRetrieveValues_MPIBAIJ",
2122                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2123   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
2124                                      "MatGetDiagonalBlock_MPIBAIJ",
2125                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 #undef __FUNC__
2130 #define __FUNC__ "MatDuplicate_MPIBAIJ"
2131 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2132 {
2133   Mat         mat;
2134   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2135   int         ierr, len=0, flg;
2136 
2137   PetscFunctionBegin;
2138   *newmat       = 0;
2139   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2140   PLogObjectCreate(mat);
2141   mat->data         = (void *) (a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2142   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2143   mat->ops->destroy = MatDestroy_MPIBAIJ;
2144   mat->ops->view    = MatView_MPIBAIJ;
2145   mat->factor       = matin->factor;
2146   mat->assembled    = PETSC_TRUE;
2147   mat->insertmode   = NOT_SET_VALUES;
2148 
2149   a->m = mat->m   = oldmat->m;
2150   a->n = mat->n   = oldmat->n;
2151   a->M = mat->M   = oldmat->M;
2152   a->N = mat->N   = oldmat->N;
2153 
2154   a->bs  = oldmat->bs;
2155   a->bs2 = oldmat->bs2;
2156   a->mbs = oldmat->mbs;
2157   a->nbs = oldmat->nbs;
2158   a->Mbs = oldmat->Mbs;
2159   a->Nbs = oldmat->Nbs;
2160 
2161   a->rstart       = oldmat->rstart;
2162   a->rend         = oldmat->rend;
2163   a->cstart       = oldmat->cstart;
2164   a->cend         = oldmat->cend;
2165   a->size         = oldmat->size;
2166   a->rank         = oldmat->rank;
2167   a->donotstash   = oldmat->donotstash;
2168   a->roworiented  = oldmat->roworiented;
2169   a->rowindices   = 0;
2170   a->rowvalues    = 0;
2171   a->getrowactive = PETSC_FALSE;
2172   a->barray       = 0;
2173   a->rstart_bs    = oldmat->rstart_bs;
2174   a->rend_bs      = oldmat->rend_bs;
2175   a->cstart_bs    = oldmat->cstart_bs;
2176   a->cend_bs      = oldmat->cend_bs;
2177 
2178   /* hash table stuff */
2179   a->ht           = 0;
2180   a->hd           = 0;
2181   a->ht_size      = 0;
2182   a->ht_flag      = oldmat->ht_flag;
2183   a->ht_fact      = oldmat->ht_fact;
2184   a->ht_total_ct  = 0;
2185   a->ht_insert_ct = 0;
2186 
2187 
2188   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2189   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2190   a->cowners    = a->rowners + a->size + 2;
2191   a->rowners_bs = a->cowners + a->size + 2;
2192   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2193   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2194   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2195   if (oldmat->colmap) {
2196 #if defined (PETSC_USE_CTABLE)
2197   ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2198 #else
2199     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2200     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2201     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2202 #endif
2203   } else a->colmap = 0;
2204   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2205     a->garray = (int *) PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2206     PLogObjectMemory(mat,len*sizeof(int));
2207     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2208   } else a->garray = 0;
2209 
2210   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2211   PLogObjectParent(mat,a->lvec);
2212   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2213 
2214   PLogObjectParent(mat,a->Mvctx);
2215   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2216   PLogObjectParent(mat,a->A);
2217   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2218   PLogObjectParent(mat,a->B);
2219   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2220   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2221   if (flg) {
2222     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2223   }
2224   *newmat = mat;
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 #include "sys.h"
2229 
2230 #undef __FUNC__
2231 #define __FUNC__ "MatLoad_MPIBAIJ"
2232 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2233 {
2234   Mat          A;
2235   int          i, nz, ierr, j,rstart, rend, fd;
2236   Scalar       *vals,*buf;
2237   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2238   MPI_Status   status;
2239   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2240   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2241   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2242   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2243   int          dcount,kmax,k,nzcount,tmp;
2244 
2245   PetscFunctionBegin;
2246   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2247 
2248   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2250   if (!rank) {
2251     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2252     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2253     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2254     if (header[3] < 0) {
2255       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2256     }
2257   }
2258 
2259   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2260   M = header[1]; N = header[2];
2261 
2262   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2263 
2264   /*
2265      This code adds extra rows to make sure the number of rows is
2266      divisible by the blocksize
2267   */
2268   Mbs        = M/bs;
2269   extra_rows = bs - M + bs*(Mbs);
2270   if (extra_rows == bs) extra_rows = 0;
2271   else                  Mbs++;
2272   if (extra_rows &&!rank) {
2273     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2274   }
2275 
2276   /* determine ownership of all rows */
2277   mbs = Mbs/size + ((Mbs % size) > rank);
2278   m   = mbs * bs;
2279   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2280   browners = rowners + size + 1;
2281   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2282   rowners[0] = 0;
2283   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2284   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2285   rstart = rowners[rank];
2286   rend   = rowners[rank+1];
2287 
2288   /* distribute row lengths to all processors */
2289   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) );CHKPTRQ(locrowlens);
2290   if (!rank) {
2291     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) );CHKPTRQ(rowlengths);
2292     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2293     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2294     sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts);
2295     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2296     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2297     PetscFree(sndcounts);
2298   } else {
2299     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2300   }
2301 
2302   if (!rank) {
2303     /* calculate the number of nonzeros on each processor */
2304     procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz);
2305     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2306     for ( i=0; i<size; i++ ) {
2307       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2308         procsnz[i] += rowlengths[j];
2309       }
2310     }
2311     PetscFree(rowlengths);
2312 
2313     /* determine max buffer needed and allocate it */
2314     maxnz = 0;
2315     for ( i=0; i<size; i++ ) {
2316       maxnz = PetscMax(maxnz,procsnz[i]);
2317     }
2318     cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols);
2319 
2320     /* read in my part of the matrix column indices  */
2321     nz = procsnz[0];
2322     ibuf = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2323     mycols = ibuf;
2324     if (size == 1)  nz -= extra_rows;
2325     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2326     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2327 
2328     /* read in every ones (except the last) and ship off */
2329     for ( i=1; i<size-1; i++ ) {
2330       nz   = procsnz[i];
2331       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2332       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2333     }
2334     /* read in the stuff for the last proc */
2335     if ( size != 1 ) {
2336       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2337       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2338       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2339       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2340     }
2341     PetscFree(cols);
2342   } else {
2343     /* determine buffer space needed for message */
2344     nz = 0;
2345     for ( i=0; i<m; i++ ) {
2346       nz += locrowlens[i];
2347     }
2348     ibuf   = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(ibuf);
2349     mycols = ibuf;
2350     /* receive message of column indices*/
2351     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2352     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2353     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2354   }
2355 
2356   /* loop over local rows, determining number of off diagonal entries */
2357   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) );CHKPTRQ(dlens);
2358   odlens = dlens + (rend-rstart);
2359   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) );CHKPTRQ(mask);
2360   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2361   masked1 = mask    + Mbs;
2362   masked2 = masked1 + Mbs;
2363   rowcount = 0; nzcount = 0;
2364   for ( i=0; i<mbs; i++ ) {
2365     dcount  = 0;
2366     odcount = 0;
2367     for ( j=0; j<bs; j++ ) {
2368       kmax = locrowlens[rowcount];
2369       for ( k=0; k<kmax; k++ ) {
2370         tmp = mycols[nzcount++]/bs;
2371         if (!mask[tmp]) {
2372           mask[tmp] = 1;
2373           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2374           else masked1[dcount++] = tmp;
2375         }
2376       }
2377       rowcount++;
2378     }
2379 
2380     dlens[i]  = dcount;
2381     odlens[i] = odcount;
2382 
2383     /* zero out the mask elements we set */
2384     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2385     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2386   }
2387 
2388   /* create our matrix */
2389   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2390   A = *newmat;
2391   MatSetOption(A,MAT_COLUMNS_SORTED);
2392 
2393   if (!rank) {
2394     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(buf);
2395     /* read in my part of the matrix numerical values  */
2396     nz = procsnz[0];
2397     vals = buf;
2398     mycols = ibuf;
2399     if (size == 1)  nz -= extra_rows;
2400     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2401     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2402 
2403     /* insert into matrix */
2404     jj      = rstart*bs;
2405     for ( i=0; i<m; i++ ) {
2406       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2407       mycols += locrowlens[i];
2408       vals   += locrowlens[i];
2409       jj++;
2410     }
2411     /* read in other processors (except the last one) and ship out */
2412     for ( i=1; i<size-1; i++ ) {
2413       nz   = procsnz[i];
2414       vals = buf;
2415       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2416       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2417     }
2418     /* the last proc */
2419     if ( size != 1 ){
2420       nz   = procsnz[i] - extra_rows;
2421       vals = buf;
2422       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2423       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2424       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2425     }
2426     PetscFree(procsnz);
2427   } else {
2428     /* receive numeric values */
2429     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(buf);
2430 
2431     /* receive message of values*/
2432     vals   = buf;
2433     mycols = ibuf;
2434     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2435     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2436     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2437 
2438     /* insert into matrix */
2439     jj      = rstart*bs;
2440     for ( i=0; i<m; i++ ) {
2441       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2442       mycols += locrowlens[i];
2443       vals   += locrowlens[i];
2444       jj++;
2445     }
2446   }
2447   PetscFree(locrowlens);
2448   PetscFree(buf);
2449   PetscFree(ibuf);
2450   PetscFree(rowners);
2451   PetscFree(dlens);
2452   PetscFree(mask);
2453   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2454   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 
2459 
2460 #undef __FUNC__
2461 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2462 /*@
2463    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2464 
2465    Input Parameters:
2466 .  mat  - the matrix
2467 .  fact - factor
2468 
2469    Collective on Mat
2470 
2471    Level: advanced
2472 
2473   Notes:
2474    This can also be set by the command line option: -mat_use_hash_table fact
2475 
2476 .keywords: matrix, hashtable, factor, HT
2477 
2478 .seealso: MatSetOption()
2479 @*/
2480 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2481 {
2482   Mat_MPIBAIJ *baij;
2483 
2484   PetscFunctionBegin;
2485   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2486   if (mat->type != MATMPIBAIJ) {
2487       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2488   }
2489   baij = (Mat_MPIBAIJ*) mat->data;
2490   baij->ht_fact = fact;
2491   PetscFunctionReturn(0);
2492 }
2493