xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision c3bdd49e83cbc3c4e928c5d2d8c94c628bcd6a06)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.126 1998/05/29 22:50:16 balay Exp balay $";
3 #endif
4 
5 #include "pinclude/pviewer.h"         /*I "mat.h" I*/
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 
10 extern int MatSetUpMultiply_MPIBAIJ(Mat);
11 extern int DisAssemble_MPIBAIJ(Mat);
12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
14 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
15 
16 /*
17      Local utility routine that creates a mapping from the global column
18    number to the local number in the off-diagonal part of the local
19    storage of the matrix.  This is done in a non scable way since the
20    length of colmap equals the global matrix length.
21 */
22 #undef __FUNC__
23 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
24 static int CreateColmap_MPIBAIJ_Private(Mat mat)
25 {
26   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
28   int         nbs = B->nbs,i,bs=B->bs;;
29 
30   PetscFunctionBegin;
31   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
32   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
33   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
34   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
35   PetscFunctionReturn(0);
36 }
37 
38 #define CHUNKSIZE  10
39 
40 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
41 { \
42  \
43     brow = row/bs;  \
44     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
45     rmax = aimax[brow]; nrow = ailen[brow]; \
46       bcol = col/bs; \
47       ridx = row % bs; cidx = col % bs; \
48       low = 0; high = nrow; \
49       while (high-low > 3) { \
50         t = (low+high)/2; \
51         if (rp[t] > bcol) high = t; \
52         else              low  = t; \
53       } \
54       for ( _i=low; _i<high; _i++ ) { \
55         if (rp[_i] > bcol) break; \
56         if (rp[_i] == bcol) { \
57           bap  = ap +  bs2*_i + bs*cidx + ridx; \
58           if (addv == ADD_VALUES) *bap += value;  \
59           else                    *bap  = value;  \
60           goto a_noinsert; \
61         } \
62       } \
63       if (a->nonew == 1) goto a_noinsert; \
64       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
65       if (nrow >= rmax) { \
66         /* there is no extra room in row, therefore enlarge */ \
67         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
68         Scalar *new_a; \
69  \
70         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
71  \
72         /* malloc new storage space */ \
73         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
74         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
75         new_j   = (int *) (new_a + bs2*new_nz); \
76         new_i   = new_j + new_nz; \
77  \
78         /* copy over old data into new slots */ \
79         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
80         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
81         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
82         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
83         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
84                                                            len*sizeof(int)); \
85         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
86         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
87         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
88                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
89         /* free up old matrix storage */ \
90         PetscFree(a->a);  \
91         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
92         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
93         a->singlemalloc = 1; \
94  \
95         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
96         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
97         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
98         a->maxnz += bs2*CHUNKSIZE; \
99         a->reallocs++; \
100         a->nz++; \
101       } \
102       N = nrow++ - 1;  \
103       /* shift up all the later entries in this row */ \
104       for ( ii=N; ii>=_i; ii-- ) { \
105         rp[ii+1] = rp[ii]; \
106         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
107       } \
108       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
109       rp[_i]                      = bcol;  \
110       ap[bs2*_i + bs*cidx + ridx] = value;  \
111       a_noinsert:; \
112     ailen[brow] = nrow; \
113 }
114 
115 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
116 { \
117  \
118     brow = row/bs;  \
119     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
120     rmax = bimax[brow]; nrow = bilen[brow]; \
121       bcol = col/bs; \
122       ridx = row % bs; cidx = col % bs; \
123       low = 0; high = nrow; \
124       while (high-low > 3) { \
125         t = (low+high)/2; \
126         if (rp[t] > bcol) high = t; \
127         else              low  = t; \
128       } \
129       for ( _i=low; _i<high; _i++ ) { \
130         if (rp[_i] > bcol) break; \
131         if (rp[_i] == bcol) { \
132           bap  = ap +  bs2*_i + bs*cidx + ridx; \
133           if (addv == ADD_VALUES) *bap += value;  \
134           else                    *bap  = value;  \
135           goto b_noinsert; \
136         } \
137       } \
138       if (b->nonew == 1) goto b_noinsert; \
139       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
140       if (nrow >= rmax) { \
141         /* there is no extra room in row, therefore enlarge */ \
142         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
143         Scalar *new_a; \
144  \
145         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
146  \
147         /* malloc new storage space */ \
148         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
149         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
150         new_j   = (int *) (new_a + bs2*new_nz); \
151         new_i   = new_j + new_nz; \
152  \
153         /* copy over old data into new slots */ \
154         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
155         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
156         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
157         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
158         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
159                                                            len*sizeof(int)); \
160         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
161         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
162         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
163                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
164         /* free up old matrix storage */ \
165         PetscFree(b->a);  \
166         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
167         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
168         b->singlemalloc = 1; \
169  \
170         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
171         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
172         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
173         b->maxnz += bs2*CHUNKSIZE; \
174         b->reallocs++; \
175         b->nz++; \
176       } \
177       N = nrow++ - 1;  \
178       /* shift up all the later entries in this row */ \
179       for ( ii=N; ii>=_i; ii-- ) { \
180         rp[ii+1] = rp[ii]; \
181         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
182       } \
183       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
184       rp[_i]                      = bcol;  \
185       ap[bs2*_i + bs*cidx + ridx] = value;  \
186       b_noinsert:; \
187     bilen[brow] = nrow; \
188 }
189 
190 #undef __FUNC__
191 #define __FUNC__ "MatSetValues_MPIBAIJ"
192 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
193 {
194   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
195   Scalar      value;
196   int         ierr,i,j,row,col;
197   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
198   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
199   int         cend_orig=baij->cend_bs,bs=baij->bs;
200 
201   /* Some Variables required in the macro */
202   Mat         A = baij->A;
203   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
204   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   Scalar      *aa=a->a;
206 
207   Mat         B = baij->B;
208   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
209   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   Scalar      *ba=b->a;
211 
212   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   int         low,high,t,ridx,cidx,bs2=a->bs2;
214   Scalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   for ( i=0; i<m; i++ ) {
218 #if defined(USE_PETSC_BOPT_g)
219     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
220     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
221 #endif
222     if (im[i] >= rstart_orig && im[i] < rend_orig) {
223       row = im[i] - rstart_orig;
224       for ( j=0; j<n; j++ ) {
225         if (in[j] >= cstart_orig && in[j] < cend_orig){
226           col = in[j] - cstart_orig;
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231 #if defined(USE_PETSC_BOPT_g)
232         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
233         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
241             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
242               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
243               col =  in[j];
244               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
245               B = baij->B;
246               b = (Mat_SeqBAIJ *) (B)->data;
247               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
248               ba=b->a;
249             }
250           } else col = in[j];
251           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
252           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (roworiented && !baij->donotstash) {
258         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
259       } else {
260         if (!baij->donotstash) {
261           row = im[i];
262 	  for ( j=0; j<n; j++ ) {
263 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
264           }
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
273 #undef __FUNC__
274 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
275 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
276 {
277   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
278   Scalar      *value,*barray=baij->barray;
279   int         ierr,i,j,ii,jj,row,col,k,l;
280   int         roworiented = baij->roworiented,rstart=baij->rstart ;
281   int         rend=baij->rend,cstart=baij->cstart,stepval;
282   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
283 
284   if(!barray) {
285     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
286   }
287 
288   if (roworiented) {
289     stepval = (n-1)*bs;
290   } else {
291     stepval = (m-1)*bs;
292   }
293   for ( i=0; i<m; i++ ) {
294 #if defined(USE_PETSC_BOPT_g)
295     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
296     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
297 #endif
298     if (im[i] >= rstart && im[i] < rend) {
299       row = im[i] - rstart;
300       for ( j=0; j<n; j++ ) {
301         /* If NumCol = 1 then a copy is not required */
302         if ((roworiented) && (n == 1)) {
303           barray = v + i*bs2;
304         } else if((!roworiented) && (m == 1)) {
305           barray = v + j*bs2;
306         } else { /* Here a copy is required */
307           if (roworiented) {
308             value = v + i*(stepval+bs)*bs + j*bs;
309           } else {
310             value = v + j*(stepval+bs)*bs + i*bs;
311           }
312           for ( ii=0; ii<bs; ii++,value+=stepval ) {
313             for (jj=0; jj<bs; jj++ ) {
314               *barray++  = *value++;
315             }
316           }
317           barray -=bs2;
318         }
319 
320         if (in[j] >= cstart && in[j] < cend){
321           col  = in[j] - cstart;
322           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
323         }
324 #if defined(USE_PETSC_BOPT_g)
325         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
326         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
327 #endif
328         else {
329           if (mat->was_assembled) {
330             if (!baij->colmap) {
331               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
332             }
333 
334 #if defined(USE_PETSC_BOPT_g)
335             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
336 #endif
337             col = (baij->colmap[in[j]] - 1)/bs;
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (!baij->donotstash) {
349         if (roworiented ) {
350           row   = im[i]*bs;
351           value = v + i*(stepval+bs)*bs;
352           for ( j=0; j<bs; j++,row++ ) {
353             for ( k=0; k<n; k++ ) {
354               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
355                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
356               }
357             }
358           }
359         } else {
360           for ( j=0; j<n; j++ ) {
361             value = v + j*(stepval+bs)*bs + i*bs;
362             col   = in[j]*bs;
363             for ( k=0; k<bs; k++,col++,value+=stepval) {
364               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
365                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
366               }
367             }
368           }
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 #include <math.h>
376 #define HASH_KEY 0.6180339887
377 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
379 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNC__
381 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
382 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
383 {
384   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
385   int         ierr,i,j,row,col;
386   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
387   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
388   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
389   double      tmp;
390   Scalar      ** HD = baij->hd,value;
391 #if defined(USE_PETSC_BOPT_g)
392   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
393 #endif
394 
395   PetscFunctionBegin;
396 
397   for ( i=0; i<m; i++ ) {
398 #if defined(USE_PETSC_BOPT_g)
399     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
400     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
401 #endif
402       row = im[i];
403     if (row >= rstart_orig && row < rend_orig) {
404       for ( j=0; j<n; j++ ) {
405         col = in[j];
406         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
407         /* Look up into the Hash Table */
408         key = (row/bs)*Nbs+(col/bs)+1;
409         h1  = HASH(size,key,tmp);
410 
411 
412         idx = h1;
413 #if defined(USE_PETSC_BOPT_g)
414         insert_ct++;
415         total_ct++;
416         if (HT[idx] != key) {
417           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
418           if (idx == size) {
419             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
420             if (idx == h1) {
421               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
422             }
423           }
424         }
425 #else
426         if (HT[idx] != key) {
427           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
428           if (idx == size) {
429             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
430             if (idx == h1) {
431               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
432             }
433           }
434         }
435 #endif
436         /* A HASH table entry is found, so insert the values at the correct address */
437         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
438         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
439       }
440     } else {
441       if (roworiented && !baij->donotstash) {
442         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
443       } else {
444         if (!baij->donotstash) {
445           row = im[i];
446 	  for ( j=0; j<n; j++ ) {
447 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
448           }
449         }
450       }
451     }
452   }
453 #if defined(USE_PETSC_BOPT_g)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNC__
461 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
463 {
464   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
465   int         ierr,i,j,ii,jj,row,col,k,l;
466   int         roworiented = baij->roworiented,rstart=baij->rstart ;
467   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
468   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
469   double      tmp;
470   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
471 #if defined(USE_PETSC_BOPT_g)
472   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
473 #endif
474 
475   PetscFunctionBegin;
476 
477   if (roworiented) {
478     stepval = (n-1)*bs;
479   } else {
480     stepval = (m-1)*bs;
481   }
482   for ( i=0; i<m; i++ ) {
483 #if defined(USE_PETSC_BOPT_g)
484     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
485     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
486 #endif
487     row   = im[i];
488     v_t   = v + i*bs2;
489     if (row >= rstart && row < rend) {
490       for ( j=0; j<n; j++ ) {
491         col = in[j];
492 
493         /* Look up into the Hash Table */
494         key = row*Nbs+col+1;
495         h1  = HASH(size,key,tmp);
496 
497         idx = h1;
498 #if defined(USE_PETSC_BOPT_g)
499         total_ct++;
500         insert_ct++;
501        if (HT[idx] != key) {
502           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
503           if (idx == size) {
504             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
505             if (idx == h1) {
506               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
507             }
508           }
509         }
510 #else
511         if (HT[idx] != key) {
512           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
513           if (idx == size) {
514             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
515             if (idx == h1) {
516               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
517             }
518           }
519         }
520 #endif
521         baij_a = HD[idx];
522         if (roworiented) {
523           /*value = v + i*(stepval+bs)*bs + j*bs;*/
524           /* value = v + (i*(stepval+bs)+j)*bs; */
525           value = v_t;
526           v_t  += bs;
527           if (addv == ADD_VALUES) {
528             for ( ii=0; ii<bs; ii++,value+=stepval) {
529               for ( jj=ii; jj<bs2; jj+=bs ) {
530                 baij_a[jj]  += *value++;
531               }
532             }
533           } else {
534             for ( ii=0; ii<bs; ii++,value+=stepval) {
535               for ( jj=ii; jj<bs2; jj+=bs ) {
536                 baij_a[jj]  = *value++;
537               }
538             }
539           }
540         } else {
541           value = v + j*(stepval+bs)*bs + i*bs;
542           if (addv == ADD_VALUES) {
543             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
544               for ( jj=0; jj<bs; jj++ ) {
545                 baij_a[jj]  += *value++;
546               }
547             }
548           } else {
549             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
550               for ( jj=0; jj<bs; jj++ ) {
551                 baij_a[jj]  = *value++;
552               }
553             }
554           }
555         }
556       }
557     } else {
558       if (!baij->donotstash) {
559         if (roworiented ) {
560           row   = im[i]*bs;
561           value = v + i*(stepval+bs)*bs;
562           for ( j=0; j<bs; j++,row++ ) {
563             for ( k=0; k<n; k++ ) {
564               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
565                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
566               }
567             }
568           }
569         } else {
570           for ( j=0; j<n; j++ ) {
571             value = v + j*(stepval+bs)*bs + i*bs;
572             col   = in[j]*bs;
573             for ( k=0; k<bs; k++,col++,value+=stepval) {
574               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
575                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
576               }
577             }
578           }
579         }
580       }
581     }
582   }
583 #if defined(USE_PETSC_BOPT_g)
584   baij->ht_total_ct = total_ct;
585   baij->ht_insert_ct = insert_ct;
586 #endif
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNC__
591 #define __FUNC__ "MatGetValues_MPIBAIJ"
592 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
593 {
594   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
595   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
596   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
597 
598   PetscFunctionBegin;
599   for ( i=0; i<m; i++ ) {
600     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
601     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
602     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
603       row = idxm[i] - bsrstart;
604       for ( j=0; j<n; j++ ) {
605         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
606         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
607         if (idxn[j] >= bscstart && idxn[j] < bscend){
608           col = idxn[j] - bscstart;
609           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
610         } else {
611           if (!baij->colmap) {
612             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
613           }
614           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
615              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
616           else {
617             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
618             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
619           }
620         }
621       }
622     } else {
623       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
624     }
625   }
626  PetscFunctionReturn(0);
627 }
628 
629 #undef __FUNC__
630 #define __FUNC__ "MatNorm_MPIBAIJ"
631 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
632 {
633   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
634   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
635   int        ierr, i,bs2=baij->bs2;
636   double     sum = 0.0;
637   Scalar     *v;
638 
639   PetscFunctionBegin;
640   if (baij->size == 1) {
641     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
642   } else {
643     if (type == NORM_FROBENIUS) {
644       v = amat->a;
645       for (i=0; i<amat->nz*bs2; i++ ) {
646 #if defined(USE_PETSC_COMPLEX)
647         sum += PetscReal(PetscConj(*v)*(*v)); v++;
648 #else
649         sum += (*v)*(*v); v++;
650 #endif
651       }
652       v = bmat->a;
653       for (i=0; i<bmat->nz*bs2; i++ ) {
654 #if defined(USE_PETSC_COMPLEX)
655         sum += PetscReal(PetscConj(*v)*(*v)); v++;
656 #else
657         sum += (*v)*(*v); v++;
658 #endif
659       }
660       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
661       *norm = sqrt(*norm);
662     } else {
663       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
664     }
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 #undef __FUNC__
670 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
671 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
672 {
673   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
674   MPI_Comm    comm = mat->comm;
675   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
676   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
677   MPI_Request *send_waits,*recv_waits;
678   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
679   InsertMode  addv;
680   Scalar      *rvalues,*svalues;
681 
682   PetscFunctionBegin;
683   if (baij->donotstash) {
684     baij->svalues    = 0; baij->rvalues    = 0;
685     baij->nsends     = 0; baij->nrecvs     = 0;
686     baij->send_waits = 0; baij->recv_waits = 0;
687     baij->rmax       = 0;
688     PetscFunctionReturn(0);
689   }
690 
691   /* make sure all processors are either in INSERTMODE or ADDMODE */
692   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
693   if (addv == (ADD_VALUES|INSERT_VALUES)) {
694     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
695   }
696   mat->insertmode = addv; /* in case this processor had no cache */
697 
698   /*  first count number of contributors to each processor */
699   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
700   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
701   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
702   for ( i=0; i<baij->stash.n; i++ ) {
703     idx = baij->stash.idx[i];
704     for ( j=0; j<size; j++ ) {
705       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
706         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
707       }
708     }
709   }
710   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
711 
712   /* inform other processors of number of messages and max length*/
713   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
714   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
715   nreceives = work[rank];
716   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
717   nmax      = work[rank];
718   PetscFree(work);
719 
720   /* post receives:
721        1) each message will consist of ordered pairs
722      (global index,value) we store the global index as a double
723      to simplify the message passing.
724        2) since we don't know how long each individual message is we
725      allocate the largest needed buffer for each receive. Potentially
726      this is a lot of wasted space.
727 
728 
729        This could be done better.
730   */
731   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
732   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
733   for ( i=0; i<nreceives; i++ ) {
734     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
735   }
736 
737   /* do sends:
738       1) starts[i] gives the starting index in svalues for stuff going to
739          the ith processor
740   */
741   svalues    = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
742   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
743   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
744   starts[0] = 0;
745   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
746   for ( i=0; i<baij->stash.n; i++ ) {
747     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
748     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
749     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
750   }
751   PetscFree(owner);
752   starts[0] = 0;
753   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
754   count = 0;
755   for ( i=0; i<size; i++ ) {
756     if (procs[i]) {
757       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
758     }
759   }
760   PetscFree(starts); PetscFree(nprocs);
761 
762   /* Free cache space */
763   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
764   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
765 
766   baij->svalues    = svalues;    baij->rvalues    = rvalues;
767   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
768   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
769   baij->rmax       = nmax;
770 
771   PetscFunctionReturn(0);
772 }
773 
774 /*
775   Creates the hash table, and sets the table
776   This table is created only once.
777   If new entried need to be added to the matrix
778   then the hash table has to be destroyed and
779   recreated.
780 */
781 #undef __FUNC__
782 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
783 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
784 {
785   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
786   Mat         A = baij->A, B=baij->B;
787   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
788   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
789   int         size,bs2=baij->bs2,rstart=baij->rstart;
790   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
791   int         *HT,key;
792   Scalar      **HD;
793   double      tmp;
794 #if defined(USE_PETSC_BOPT_g)
795   int         ct=0,max=0;
796 #endif
797 
798   PetscFunctionBegin;
799   baij->ht_size=(int)(factor*nz);
800   size = baij->ht_size;
801 
802   if (baij->ht) {
803     PetscFunctionReturn(0);
804   }
805 
806   /* Allocate Memory for Hash Table */
807   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
808   baij->ht = (int*)(baij->hd + size);
809   HD = baij->hd;
810   HT = baij->ht;
811 
812 
813   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
814 
815 
816   /* Loop Over A */
817   for ( i=0; i<a->mbs; i++ ) {
818     for ( j=ai[i]; j<ai[i+1]; j++ ) {
819       row = i+rstart;
820       col = aj[j]+cstart;
821 
822       key = row*Nbs + col + 1;
823       h1  = HASH(size,key,tmp);
824       for ( k=0; k<size; k++ ){
825         if (HT[(h1+k)%size] == 0.0) {
826           HT[(h1+k)%size] = key;
827           HD[(h1+k)%size] = a->a + j*bs2;
828           break;
829 #if defined(USE_PETSC_BOPT_g)
830         } else {
831           ct++;
832 #endif
833         }
834       }
835 #if defined(USE_PETSC_BOPT_g)
836       if (k> max) max = k;
837 #endif
838     }
839   }
840   /* Loop Over B */
841   for ( i=0; i<b->mbs; i++ ) {
842     for ( j=bi[i]; j<bi[i+1]; j++ ) {
843       row = i+rstart;
844       col = garray[bj[j]];
845       key = row*Nbs + col + 1;
846       h1  = HASH(size,key,tmp);
847       for ( k=0; k<size; k++ ){
848         if (HT[(h1+k)%size] == 0.0) {
849           HT[(h1+k)%size] = key;
850           HD[(h1+k)%size] = b->a + j*bs2;
851           break;
852 #if defined(USE_PETSC_BOPT_g)
853         } else {
854           ct++;
855 #endif
856         }
857       }
858 #if defined(USE_PETSC_BOPT_g)
859       if (k> max) max = k;
860 #endif
861     }
862   }
863 
864   /* Print Summary */
865 #if defined(USE_PETSC_BOPT_g)
866   for ( i=0,j=0; i<size; i++)
867     if (HT[i]) {j++;}
868   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
869            (j== 0)? 0.0:((double)(ct+j))/j,max);
870 #endif
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNC__
875 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
876 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
877 {
878   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
879   MPI_Status  *send_status,recv_status;
880   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
881   int         bs=baij->bs,row,col,other_disassembled;
882   Scalar      *values,val;
883   InsertMode  addv = mat->insertmode;
884 
885   PetscFunctionBegin;
886   /*  wait on receives */
887   while (count) {
888     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
889     /* unpack receives into our local space */
890     values = baij->rvalues + 3*imdex*baij->rmax;
891     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
892     n = n/3;
893     for ( i=0; i<n; i++ ) {
894       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
895       col = (int) PetscReal(values[3*i+1]);
896       val = values[3*i+2];
897       if (col >= baij->cstart*bs && col < baij->cend*bs) {
898         col -= baij->cstart*bs;
899         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
900       } else {
901         if (mat->was_assembled) {
902           if (!baij->colmap) {
903             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
904           }
905           col = (baij->colmap[col/bs]) - 1 + col%bs;
906           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
907             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
908             col = (int) PetscReal(values[3*i+1]);
909           }
910         }
911         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
912       }
913     }
914     count--;
915   }
916   if (baij->recv_waits) PetscFree(baij->recv_waits);
917   if (baij->rvalues)    PetscFree(baij->rvalues);
918 
919   /* wait on sends */
920   if (baij->nsends) {
921     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
922     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
923     PetscFree(send_status);
924   }
925   if (baij->send_waits) PetscFree(baij->send_waits);
926   if (baij->svalues)    PetscFree(baij->svalues);
927 
928   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
930 
931   /* determine if any processor has disassembled, if so we must
932      also disassemble ourselfs, in order that we may reassemble. */
933   /*
934      if nonzero structure of submatrix B cannot change then we know that
935      no processor disassembled thus we can skip this stuff
936   */
937   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
938     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
939     if (mat->was_assembled && !other_disassembled) {
940       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
941     }
942   }
943 
944   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
945     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
946   }
947   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
948   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
949 
950 #if defined(USE_PETSC_BOPT_g)
951   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
952     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
953              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
954     baij->ht_total_ct  = 0;
955     baij->ht_insert_ct = 0;
956   }
957 #endif
958   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
959     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
960     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
961     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
962   }
963 
964   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNC__
969 #define __FUNC__ "MatView_MPIBAIJ_Binary"
970 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
971 {
972   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
973   int          ierr;
974 
975   PetscFunctionBegin;
976   if (baij->size == 1) {
977     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
978   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNC__
983 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
984 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
985 {
986   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
987   int          ierr, format,rank,bs = baij->bs;
988   FILE         *fd;
989   ViewerType   vtype;
990 
991   PetscFunctionBegin;
992   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
993   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
994     ierr = ViewerGetFormat(viewer,&format);
995     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
996       MatInfo info;
997       MPI_Comm_rank(mat->comm,&rank);
998       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
999       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
1000       PetscSequentialPhaseBegin(mat->comm,1);
1001       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1002               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1003               baij->bs,(int)info.memory);
1004       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
1005       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
1006       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
1007       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
1008       fflush(fd);
1009       PetscSequentialPhaseEnd(mat->comm,1);
1010       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
1011       PetscFunctionReturn(0);
1012     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1013       PetscPrintf(mat->comm,"  block size is %d\n",bs);
1014       PetscFunctionReturn(0);
1015     }
1016   }
1017 
1018   if (vtype == DRAW_VIEWER) {
1019     Draw       draw;
1020     PetscTruth isnull;
1021     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
1022     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1023   }
1024 
1025   if (vtype == ASCII_FILE_VIEWER) {
1026     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1027     PetscSequentialPhaseBegin(mat->comm,1);
1028     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
1029            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
1030             baij->cstart*bs,baij->cend*bs);
1031     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1032     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
1033     fflush(fd);
1034     PetscSequentialPhaseEnd(mat->comm,1);
1035   } else {
1036     int size = baij->size;
1037     rank = baij->rank;
1038     if (size == 1) {
1039       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1040     } else {
1041       /* assemble the entire matrix onto first processor. */
1042       Mat         A;
1043       Mat_SeqBAIJ *Aloc;
1044       int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
1045       int         mbs=baij->mbs;
1046       Scalar      *a;
1047 
1048       if (!rank) {
1049         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1050       } else {
1051         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1052       }
1053       PLogObjectParent(mat,A);
1054 
1055       /* copy over the A part */
1056       Aloc = (Mat_SeqBAIJ*) baij->A->data;
1057       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1058       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1059 
1060       for ( i=0; i<mbs; i++ ) {
1061         rvals[0] = bs*(baij->rstart + i);
1062         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1063         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1064           col = (baij->cstart+aj[j])*bs;
1065           for (k=0; k<bs; k++ ) {
1066             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1067             col++; a += bs;
1068           }
1069         }
1070       }
1071       /* copy over the B part */
1072       Aloc = (Mat_SeqBAIJ*) baij->B->data;
1073       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1074       for ( i=0; i<mbs; i++ ) {
1075         rvals[0] = bs*(baij->rstart + i);
1076         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1077         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1078           col = baij->garray[aj[j]]*bs;
1079           for (k=0; k<bs; k++ ) {
1080             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1081             col++; a += bs;
1082           }
1083         }
1084       }
1085       PetscFree(rvals);
1086       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1087       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1088       /*
1089          Everyone has to call to draw the matrix since the graphics waits are
1090          synchronized across all processors that share the Draw object
1091       */
1092       if (!rank || vtype == DRAW_VIEWER) {
1093         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1094       }
1095       ierr = MatDestroy(A); CHKERRQ(ierr);
1096     }
1097   }
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 
1102 
1103 #undef __FUNC__
1104 #define __FUNC__ "MatView_MPIBAIJ"
1105 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1106 {
1107   int         ierr;
1108   ViewerType  vtype;
1109 
1110   PetscFunctionBegin;
1111   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1112   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
1113       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
1114     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
1115   } else if (vtype == BINARY_FILE_VIEWER) {
1116     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1117   } else {
1118     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1119   }
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNC__
1124 #define __FUNC__ "MatDestroy_MPIBAIJ"
1125 int MatDestroy_MPIBAIJ(Mat mat)
1126 {
1127   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1128   int         ierr;
1129 
1130   PetscFunctionBegin;
1131   if (--mat->refct > 0) PetscFunctionReturn(0);
1132 
1133   if (mat->mapping) {
1134     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
1135   }
1136   if (mat->bmapping) {
1137     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
1138   }
1139 #if defined(USE_PETSC_LOG)
1140   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1141 #endif
1142 
1143   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1144   PetscFree(baij->rowners);
1145   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1146   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1147   if (baij->colmap) PetscFree(baij->colmap);
1148   if (baij->garray) PetscFree(baij->garray);
1149   if (baij->lvec)   VecDestroy(baij->lvec);
1150   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1151   if (baij->rowvalues) PetscFree(baij->rowvalues);
1152   if (baij->barray) PetscFree(baij->barray);
1153   if (baij->hd) PetscFree(baij->hd);
1154   PetscFree(baij);
1155   PLogObjectDestroy(mat);
1156   PetscHeaderDestroy(mat);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNC__
1161 #define __FUNC__ "MatMult_MPIBAIJ"
1162 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1163 {
1164   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1165   int         ierr, nt;
1166 
1167   PetscFunctionBegin;
1168   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1169   if (nt != a->n) {
1170     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1171   }
1172   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1173   if (nt != a->m) {
1174     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1175   }
1176   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1177   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1178   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1179   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1180   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNC__
1185 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1186 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1187 {
1188   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1189   int        ierr;
1190 
1191   PetscFunctionBegin;
1192   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1193   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1194   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1195   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNC__
1200 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1201 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1202 {
1203   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1204   int         ierr;
1205 
1206   PetscFunctionBegin;
1207   /* do nondiagonal part */
1208   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1209   /* send it on its way */
1210   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1211   /* do local part */
1212   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1213   /* receive remote parts: note this assumes the values are not actually */
1214   /* inserted in yy until the next line, which is true for my implementation*/
1215   /* but is not perhaps always true. */
1216   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNC__
1221 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1222 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1223 {
1224   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1225   int         ierr;
1226 
1227   PetscFunctionBegin;
1228   /* do nondiagonal part */
1229   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1230   /* send it on its way */
1231   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1232   /* do local part */
1233   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1234   /* receive remote parts: note this assumes the values are not actually */
1235   /* inserted in yy until the next line, which is true for my implementation*/
1236   /* but is not perhaps always true. */
1237   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 /*
1242   This only works correctly for square matrices where the subblock A->A is the
1243    diagonal block
1244 */
1245 #undef __FUNC__
1246 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1247 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1248 {
1249   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1250   int         ierr;
1251 
1252   PetscFunctionBegin;
1253   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1254   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNC__
1259 #define __FUNC__ "MatScale_MPIBAIJ"
1260 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1261 {
1262   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1263   int         ierr;
1264 
1265   PetscFunctionBegin;
1266   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1267   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNC__
1272 #define __FUNC__ "MatGetSize_MPIBAIJ"
1273 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1274 {
1275   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1276 
1277   PetscFunctionBegin;
1278   if (m) *m = mat->M;
1279   if (n) *n = mat->N;
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNC__
1284 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1285 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1286 {
1287   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1288 
1289   PetscFunctionBegin;
1290   *m = mat->m; *n = mat->n;
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNC__
1295 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1296 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1297 {
1298   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1299 
1300   PetscFunctionBegin;
1301   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1306 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1307 
1308 #undef __FUNC__
1309 #define __FUNC__ "MatGetRow_MPIBAIJ"
1310 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1311 {
1312   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1313   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1314   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1315   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1316   int        *cmap, *idx_p,cstart = mat->cstart;
1317 
1318   PetscFunctionBegin;
1319   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1320   mat->getrowactive = PETSC_TRUE;
1321 
1322   if (!mat->rowvalues && (idx || v)) {
1323     /*
1324         allocate enough space to hold information from the longest row.
1325     */
1326     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1327     int     max = 1,mbs = mat->mbs,tmp;
1328     for ( i=0; i<mbs; i++ ) {
1329       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1330       if (max < tmp) { max = tmp; }
1331     }
1332     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1333     CHKPTRQ(mat->rowvalues);
1334     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1335   }
1336 
1337   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1338   lrow = row - brstart;
1339 
1340   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1341   if (!v)   {pvA = 0; pvB = 0;}
1342   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1343   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1344   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1345   nztot = nzA + nzB;
1346 
1347   cmap  = mat->garray;
1348   if (v  || idx) {
1349     if (nztot) {
1350       /* Sort by increasing column numbers, assuming A and B already sorted */
1351       int imark = -1;
1352       if (v) {
1353         *v = v_p = mat->rowvalues;
1354         for ( i=0; i<nzB; i++ ) {
1355           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1356           else break;
1357         }
1358         imark = i;
1359         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1360         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1361       }
1362       if (idx) {
1363         *idx = idx_p = mat->rowindices;
1364         if (imark > -1) {
1365           for ( i=0; i<imark; i++ ) {
1366             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1367           }
1368         } else {
1369           for ( i=0; i<nzB; i++ ) {
1370             if (cmap[cworkB[i]/bs] < cstart)
1371               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1372             else break;
1373           }
1374           imark = i;
1375         }
1376         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1377         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1378       }
1379     } else {
1380       if (idx) *idx = 0;
1381       if (v)   *v   = 0;
1382     }
1383   }
1384   *nz = nztot;
1385   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1386   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #undef __FUNC__
1391 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1392 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1393 {
1394   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1395 
1396   PetscFunctionBegin;
1397   if (baij->getrowactive == PETSC_FALSE) {
1398     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1399   }
1400   baij->getrowactive = PETSC_FALSE;
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNC__
1405 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1406 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1407 {
1408   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1409 
1410   PetscFunctionBegin;
1411   *bs = baij->bs;
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNC__
1416 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1417 int MatZeroEntries_MPIBAIJ(Mat A)
1418 {
1419   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1420   int         ierr;
1421 
1422   PetscFunctionBegin;
1423   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1424   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNC__
1429 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1430 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1431 {
1432   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1433   Mat         A = a->A, B = a->B;
1434   int         ierr;
1435   double      isend[5], irecv[5];
1436 
1437   PetscFunctionBegin;
1438   info->block_size     = (double)a->bs;
1439   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1440   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
1441   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1442   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
1443   if (flag == MAT_LOCAL) {
1444     info->nz_used      = isend[0];
1445     info->nz_allocated = isend[1];
1446     info->nz_unneeded  = isend[2];
1447     info->memory       = isend[3];
1448     info->mallocs      = isend[4];
1449   } else if (flag == MAT_GLOBAL_MAX) {
1450     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1451     info->nz_used      = irecv[0];
1452     info->nz_allocated = irecv[1];
1453     info->nz_unneeded  = irecv[2];
1454     info->memory       = irecv[3];
1455     info->mallocs      = irecv[4];
1456   } else if (flag == MAT_GLOBAL_SUM) {
1457     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1458     info->nz_used      = irecv[0];
1459     info->nz_allocated = irecv[1];
1460     info->nz_unneeded  = irecv[2];
1461     info->memory       = irecv[3];
1462     info->mallocs      = irecv[4];
1463   }
1464   info->rows_global       = (double)a->M;
1465   info->columns_global    = (double)a->N;
1466   info->rows_local        = (double)a->m;
1467   info->columns_local     = (double)a->N;
1468   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1469   info->fill_ratio_needed = 0;
1470   info->factor_mallocs    = 0;
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNC__
1475 #define __FUNC__ "MatSetOption_MPIBAIJ"
1476 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1477 {
1478   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1479 
1480   PetscFunctionBegin;
1481   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1482       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1483       op == MAT_COLUMNS_UNSORTED ||
1484       op == MAT_COLUMNS_SORTED ||
1485       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1486       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1487         MatSetOption(a->A,op);
1488         MatSetOption(a->B,op);
1489   } else if (op == MAT_ROW_ORIENTED) {
1490         a->roworiented = 1;
1491         MatSetOption(a->A,op);
1492         MatSetOption(a->B,op);
1493   } else if (op == MAT_ROWS_SORTED ||
1494              op == MAT_ROWS_UNSORTED ||
1495              op == MAT_SYMMETRIC ||
1496              op == MAT_STRUCTURALLY_SYMMETRIC ||
1497              op == MAT_YES_NEW_DIAGONALS ||
1498              op == MAT_USE_HASH_TABLE)
1499     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1500   else if (op == MAT_COLUMN_ORIENTED) {
1501     a->roworiented = 0;
1502     MatSetOption(a->A,op);
1503     MatSetOption(a->B,op);
1504   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1505     a->donotstash = 1;
1506   } else if (op == MAT_NO_NEW_DIAGONALS) {
1507     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1508   } else if (op == MAT_USE_HASH_TABLE) {
1509     a->ht_flag = 1;
1510   } else {
1511     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1512   }
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNC__
1517 #define __FUNC__ "MatTranspose_MPIBAIJ("
1518 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1519 {
1520   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1521   Mat_SeqBAIJ *Aloc;
1522   Mat        B;
1523   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1524   int        bs=baij->bs,mbs=baij->mbs;
1525   Scalar     *a;
1526 
1527   PetscFunctionBegin;
1528   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1529   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1530   CHKERRQ(ierr);
1531 
1532   /* copy over the A part */
1533   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1534   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1535   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1536 
1537   for ( i=0; i<mbs; i++ ) {
1538     rvals[0] = bs*(baij->rstart + i);
1539     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1540     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1541       col = (baij->cstart+aj[j])*bs;
1542       for (k=0; k<bs; k++ ) {
1543         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1544         col++; a += bs;
1545       }
1546     }
1547   }
1548   /* copy over the B part */
1549   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1550   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1551   for ( i=0; i<mbs; i++ ) {
1552     rvals[0] = bs*(baij->rstart + i);
1553     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1554     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1555       col = baij->garray[aj[j]]*bs;
1556       for (k=0; k<bs; k++ ) {
1557         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1558         col++; a += bs;
1559       }
1560     }
1561   }
1562   PetscFree(rvals);
1563   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1564   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1565 
1566   if (matout != PETSC_NULL) {
1567     *matout = B;
1568   } else {
1569     PetscOps       *Abops;
1570     struct _MatOps *Aops;
1571 
1572     /* This isn't really an in-place transpose .... but free data structures from baij */
1573     PetscFree(baij->rowners);
1574     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1575     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1576     if (baij->colmap) PetscFree(baij->colmap);
1577     if (baij->garray) PetscFree(baij->garray);
1578     if (baij->lvec) VecDestroy(baij->lvec);
1579     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1580     PetscFree(baij);
1581 
1582     /*
1583        This is horrible, horrible code. We need to keep the
1584       A pointers for the bops and ops but copy everything
1585       else from C.
1586     */
1587     Abops = A->bops;
1588     Aops  = A->ops;
1589     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1590     A->bops = Abops;
1591     A->ops  = Aops;
1592 
1593     PetscHeaderDestroy(B);
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #undef __FUNC__
1599 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1600 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1601 {
1602   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1603   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1604   int ierr,s1,s2,s3;
1605 
1606   PetscFunctionBegin;
1607   if (ll)  {
1608     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1609     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1610     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1611     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1612     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1613   }
1614   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1615   PetscFunctionReturn(0);
1616 }
1617 
1618 #undef __FUNC__
1619 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1620 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1621 {
1622   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1623   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1624   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1625   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1626   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1627   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1628   MPI_Comm       comm = A->comm;
1629   MPI_Request    *send_waits,*recv_waits;
1630   MPI_Status     recv_status,*send_status;
1631   IS             istmp;
1632   PetscTruth     localdiag;
1633 
1634   PetscFunctionBegin;
1635   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1636   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1637 
1638   /*  first count number of contributors to each processor */
1639   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1640   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1641   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1642   for ( i=0; i<N; i++ ) {
1643     idx = rows[i];
1644     found = 0;
1645     for ( j=0; j<size; j++ ) {
1646       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1647         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1648       }
1649     }
1650     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1651   }
1652   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1653 
1654   /* inform other processors of number of messages and max length*/
1655   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1656   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1657   nrecvs = work[rank];
1658   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1659   nmax   = work[rank];
1660   PetscFree(work);
1661 
1662   /* post receives:   */
1663   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1664   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1665   for ( i=0; i<nrecvs; i++ ) {
1666     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1667   }
1668 
1669   /* do sends:
1670      1) starts[i] gives the starting index in svalues for stuff going to
1671      the ith processor
1672   */
1673   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1674   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1675   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1676   starts[0] = 0;
1677   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1678   for ( i=0; i<N; i++ ) {
1679     svalues[starts[owner[i]]++] = rows[i];
1680   }
1681   ISRestoreIndices(is,&rows);
1682 
1683   starts[0] = 0;
1684   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1685   count = 0;
1686   for ( i=0; i<size; i++ ) {
1687     if (procs[i]) {
1688       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1689     }
1690   }
1691   PetscFree(starts);
1692 
1693   base = owners[rank]*bs;
1694 
1695   /*  wait on receives */
1696   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1697   source = lens + nrecvs;
1698   count  = nrecvs; slen = 0;
1699   while (count) {
1700     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1701     /* unpack receives into our local space */
1702     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1703     source[imdex]  = recv_status.MPI_SOURCE;
1704     lens[imdex]  = n;
1705     slen += n;
1706     count--;
1707   }
1708   PetscFree(recv_waits);
1709 
1710   /* move the data into the send scatter */
1711   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1712   count = 0;
1713   for ( i=0; i<nrecvs; i++ ) {
1714     values = rvalues + i*nmax;
1715     for ( j=0; j<lens[i]; j++ ) {
1716       lrows[count++] = values[j] - base;
1717     }
1718   }
1719   PetscFree(rvalues); PetscFree(lens);
1720   PetscFree(owner); PetscFree(nprocs);
1721 
1722   /* actually zap the local rows */
1723   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1724   PLogObjectParent(A,istmp);
1725 
1726   /*
1727         Zero the required rows. If the "diagonal block" of the matrix
1728      is square and the user wishes to set the diagonal we use seperate
1729      code so that MatSetValues() is not called for each diagonal allocating
1730      new memory, thus calling lots of mallocs and slowing things down.
1731 
1732        Contributed by: Mathew Knepley
1733   */
1734   localdiag = PETSC_FALSE;
1735   if (diag && (l->A->M == l->A->N)) {
1736     localdiag = PETSC_TRUE;
1737     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1738   } else {
1739     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1740   }
1741   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1742   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1743 
1744   if (diag && (localdiag == PETSC_FALSE)) {
1745     for ( i = 0; i < slen; i++ ) {
1746       row = lrows[i] + rstart_bs;
1747       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1748     }
1749     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1750     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1751   }
1752   PetscFree(lrows);
1753 
1754   /* wait on sends */
1755   if (nsends) {
1756     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1757     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1758     PetscFree(send_status);
1759   }
1760   PetscFree(send_waits); PetscFree(svalues);
1761 
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 extern int MatPrintHelp_SeqBAIJ(Mat);
1766 #undef __FUNC__
1767 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1768 int MatPrintHelp_MPIBAIJ(Mat A)
1769 {
1770   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1771   MPI_Comm    comm = A->comm;
1772   static int  called = 0;
1773   int         ierr;
1774 
1775   PetscFunctionBegin;
1776   if (!a->rank) {
1777     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1778   }
1779   if (called) {PetscFunctionReturn(0);} else called = 1;
1780   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1781   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 #undef __FUNC__
1786 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1787 int MatSetUnfactored_MPIBAIJ(Mat A)
1788 {
1789   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1790   int         ierr;
1791 
1792   PetscFunctionBegin;
1793   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
1798 
1799 /* -------------------------------------------------------------------*/
1800 static struct _MatOps MatOps = {
1801   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
1802   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
1803   0,0,0,0,
1804   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
1805   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
1806   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
1807   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
1808   0,0,0,MatGetSize_MPIBAIJ,
1809   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1810   0,0,MatConvertSameType_MPIBAIJ,0,0,
1811   0,0,0,MatGetSubMatrices_MPIBAIJ,
1812   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1813   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1814   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
1815 
1816 
1817 #undef __FUNC__
1818 #define __FUNC__ "MatCreateMPIBAIJ"
1819 /*@C
1820    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1821    (block compressed row).  For good matrix assembly performance
1822    the user should preallocate the matrix storage by setting the parameters
1823    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1824    performance can be increased by more than a factor of 50.
1825 
1826    Collective on MPI_Comm
1827 
1828    Input Parameters:
1829 +  comm - MPI communicator
1830 .  bs   - size of blockk
1831 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1832            This value should be the same as the local size used in creating the
1833            y vector for the matrix-vector product y = Ax.
1834 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1835            This value should be the same as the local size used in creating the
1836            x vector for the matrix-vector product y = Ax.
1837 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1838 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1839 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1840            submatrix  (same for all local rows)
1841 .  d_nzz - array containing the number of block nonzeros in the various block rows
1842            of the in diagonal portion of the local (possibly different for each block
1843            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1844 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1845            submatrix (same for all local rows).
1846 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1847            off-diagonal portion of the local submatrix (possibly different for
1848            each block row) or PETSC_NULL.
1849 
1850    Output Parameter:
1851 .  A - the matrix
1852 
1853    Options Database Keys:
1854 .   -mat_no_unroll - uses code that does not unroll the loops in the
1855                      block calculations (much slower)
1856 .   -mat_block_size - size of the blocks to use
1857 
1858    Notes:
1859    The user MUST specify either the local or global matrix dimensions
1860    (possibly both).
1861 
1862    Storage Information:
1863    For a square global matrix we define each processor's diagonal portion
1864    to be its local rows and the corresponding columns (a square submatrix);
1865    each processor's off-diagonal portion encompasses the remainder of the
1866    local matrix (a rectangular submatrix).
1867 
1868    The user can specify preallocated storage for the diagonal part of
1869    the local submatrix with either d_nz or d_nnz (not both).  Set
1870    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1871    memory allocation.  Likewise, specify preallocated storage for the
1872    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1873 
1874    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1875    the figure below we depict these three local rows and all columns (0-11).
1876 
1877 .vb
1878            0 1 2 3 4 5 6 7 8 9 10 11
1879           -------------------
1880    row 3  |  o o o d d d o o o o o o
1881    row 4  |  o o o d d d o o o o o o
1882    row 5  |  o o o d d d o o o o o o
1883           -------------------
1884 .ve
1885 
1886    Thus, any entries in the d locations are stored in the d (diagonal)
1887    submatrix, and any entries in the o locations are stored in the
1888    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1889    stored simply in the MATSEQBAIJ format for compressed row storage.
1890 
1891    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1892    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1893    In general, for PDE problems in which most nonzeros are near the diagonal,
1894    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1895    or you will get TERRIBLE performance; see the users' manual chapter on
1896    matrices.
1897 
1898 .keywords: matrix, block, aij, compressed row, sparse, parallel
1899 
1900 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1901 @*/
1902 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1903                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1904 {
1905   Mat          B;
1906   Mat_MPIBAIJ  *b;
1907   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1908 
1909   PetscFunctionBegin;
1910   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1911 
1912   MPI_Comm_size(comm,&size);
1913   if (size == 1) {
1914     if (M == PETSC_DECIDE) M = m;
1915     if (N == PETSC_DECIDE) N = n;
1916     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1917     PetscFunctionReturn(0);
1918   }
1919 
1920   *A = 0;
1921   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
1922   PLogObjectCreate(B);
1923   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1924   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1925   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1926 
1927   B->ops->destroy    = MatDestroy_MPIBAIJ;
1928   B->ops->view       = MatView_MPIBAIJ;
1929   B->mapping    = 0;
1930   B->factor     = 0;
1931   B->assembled  = PETSC_FALSE;
1932 
1933   B->insertmode = NOT_SET_VALUES;
1934   MPI_Comm_rank(comm,&b->rank);
1935   MPI_Comm_size(comm,&b->size);
1936 
1937   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1938     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1939   }
1940   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1941     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1942   }
1943   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1944     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1945   }
1946   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1947   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1948 
1949   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1950     work[0] = m; work[1] = n;
1951     mbs = m/bs; nbs = n/bs;
1952     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1953     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1954     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1955   }
1956   if (m == PETSC_DECIDE) {
1957     Mbs = M/bs;
1958     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1959     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1960     m   = mbs*bs;
1961   }
1962   if (n == PETSC_DECIDE) {
1963     Nbs = N/bs;
1964     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1965     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1966     n   = nbs*bs;
1967   }
1968   if (mbs*bs != m || nbs*bs != n) {
1969     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1970   }
1971 
1972   b->m = m; B->m = m;
1973   b->n = n; B->n = n;
1974   b->N = N; B->N = N;
1975   b->M = M; B->M = M;
1976   b->bs  = bs;
1977   b->bs2 = bs*bs;
1978   b->mbs = mbs;
1979   b->nbs = nbs;
1980   b->Mbs = Mbs;
1981   b->Nbs = Nbs;
1982 
1983   /* build local table of row and column ownerships */
1984   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1985   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1986   b->cowners = b->rowners + b->size + 2;
1987   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1988   b->rowners[0] = 0;
1989   for ( i=2; i<=b->size; i++ ) {
1990     b->rowners[i] += b->rowners[i-1];
1991   }
1992   b->rstart    = b->rowners[b->rank];
1993   b->rend      = b->rowners[b->rank+1];
1994   b->rstart_bs = b->rstart * bs;
1995   b->rend_bs   = b->rend * bs;
1996 
1997   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1998   b->cowners[0] = 0;
1999   for ( i=2; i<=b->size; i++ ) {
2000     b->cowners[i] += b->cowners[i-1];
2001   }
2002   b->cstart    = b->cowners[b->rank];
2003   b->cend      = b->cowners[b->rank+1];
2004   b->cstart_bs = b->cstart * bs;
2005   b->cend_bs   = b->cend * bs;
2006 
2007 
2008   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2009   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2010   PLogObjectParent(B,b->A);
2011   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2012   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2013   PLogObjectParent(B,b->B);
2014 
2015   /* build cache for off array entries formed */
2016   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
2017   b->donotstash  = 0;
2018   b->colmap      = 0;
2019   b->garray      = 0;
2020   b->roworiented = 1;
2021 
2022   /* stuff used in block assembly */
2023   b->barray       = 0;
2024 
2025   /* stuff used for matrix vector multiply */
2026   b->lvec         = 0;
2027   b->Mvctx        = 0;
2028 
2029   /* stuff for MatGetRow() */
2030   b->rowindices   = 0;
2031   b->rowvalues    = 0;
2032   b->getrowactive = PETSC_FALSE;
2033 
2034   /* hash table stuff */
2035   b->ht           = 0;
2036   b->hd           = 0;
2037   b->ht_size      = 0;
2038   b->ht_flag      = 0;
2039   b->ht_fact      = 0;
2040   b->ht_total_ct  = 0;
2041   b->ht_insert_ct = 0;
2042 
2043   *A = B;
2044   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2045   if (flg) {
2046     double fact = 1.39;
2047     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2048     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2049     if (fact <= 1.0) fact = 1.39;
2050     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2051     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2052   }
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNC__
2057 #define __FUNC__ "MatConvertSameType_MPIBAIJ"
2058 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
2059 {
2060   Mat         mat;
2061   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2062   int         ierr, len=0, flg;
2063 
2064   PetscFunctionBegin;
2065   *newmat       = 0;
2066   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
2067   PLogObjectCreate(mat);
2068   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2069   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
2070   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2071   mat->ops->view       = MatView_MPIBAIJ;
2072   mat->factor     = matin->factor;
2073   mat->assembled  = PETSC_TRUE;
2074 
2075   a->m = mat->m   = oldmat->m;
2076   a->n = mat->n   = oldmat->n;
2077   a->M = mat->M   = oldmat->M;
2078   a->N = mat->N   = oldmat->N;
2079 
2080   a->bs  = oldmat->bs;
2081   a->bs2 = oldmat->bs2;
2082   a->mbs = oldmat->mbs;
2083   a->nbs = oldmat->nbs;
2084   a->Mbs = oldmat->Mbs;
2085   a->Nbs = oldmat->Nbs;
2086 
2087   a->rstart       = oldmat->rstart;
2088   a->rend         = oldmat->rend;
2089   a->cstart       = oldmat->cstart;
2090   a->cend         = oldmat->cend;
2091   a->size         = oldmat->size;
2092   a->rank         = oldmat->rank;
2093   mat->insertmode = NOT_SET_VALUES;
2094   a->rowvalues    = 0;
2095   a->getrowactive = PETSC_FALSE;
2096   a->barray       = 0;
2097 
2098   /* hash table stuff */
2099   a->ht           = 0;
2100   a->hd           = 0;
2101   a->ht_size      = 0;
2102   a->ht_flag      = oldmat->ht_flag;
2103   a->ht_fact      = oldmat->ht_fact;
2104   a->ht_total_ct  = 0;
2105   a->ht_insert_ct = 0;
2106 
2107 
2108   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2109   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2110   a->cowners = a->rowners + a->size + 2;
2111   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2112   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
2113   if (oldmat->colmap) {
2114     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2115     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2116     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2117   } else a->colmap = 0;
2118   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2119     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2120     PLogObjectMemory(mat,len*sizeof(int));
2121     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2122   } else a->garray = 0;
2123 
2124   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2125   PLogObjectParent(mat,a->lvec);
2126   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2127   PLogObjectParent(mat,a->Mvctx);
2128   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
2129   PLogObjectParent(mat,a->A);
2130   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
2131   PLogObjectParent(mat,a->B);
2132   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2133   if (flg) {
2134     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2135   }
2136   *newmat = mat;
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 #include "sys.h"
2141 
2142 #undef __FUNC__
2143 #define __FUNC__ "MatLoad_MPIBAIJ"
2144 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2145 {
2146   Mat          A;
2147   int          i, nz, ierr, j,rstart, rend, fd;
2148   Scalar       *vals,*buf;
2149   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2150   MPI_Status   status;
2151   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2152   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2153   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2154   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2155   int          dcount,kmax,k,nzcount,tmp;
2156 
2157   PetscFunctionBegin;
2158   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2159 
2160   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2161   if (!rank) {
2162     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2163     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2164     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2165     if (header[3] < 0) {
2166       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2167     }
2168   }
2169 
2170   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2171   M = header[1]; N = header[2];
2172 
2173   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2174 
2175   /*
2176      This code adds extra rows to make sure the number of rows is
2177      divisible by the blocksize
2178   */
2179   Mbs        = M/bs;
2180   extra_rows = bs - M + bs*(Mbs);
2181   if (extra_rows == bs) extra_rows = 0;
2182   else                  Mbs++;
2183   if (extra_rows &&!rank) {
2184     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2185   }
2186 
2187   /* determine ownership of all rows */
2188   mbs = Mbs/size + ((Mbs % size) > rank);
2189   m   = mbs * bs;
2190   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2191   browners = rowners + size + 1;
2192   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2193   rowners[0] = 0;
2194   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2195   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2196   rstart = rowners[rank];
2197   rend   = rowners[rank+1];
2198 
2199   /* distribute row lengths to all processors */
2200   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2201   if (!rank) {
2202     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2203     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2204     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2205     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2206     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2207     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2208     PetscFree(sndcounts);
2209   } else {
2210     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2211   }
2212 
2213   if (!rank) {
2214     /* calculate the number of nonzeros on each processor */
2215     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2216     PetscMemzero(procsnz,size*sizeof(int));
2217     for ( i=0; i<size; i++ ) {
2218       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2219         procsnz[i] += rowlengths[j];
2220       }
2221     }
2222     PetscFree(rowlengths);
2223 
2224     /* determine max buffer needed and allocate it */
2225     maxnz = 0;
2226     for ( i=0; i<size; i++ ) {
2227       maxnz = PetscMax(maxnz,procsnz[i]);
2228     }
2229     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2230 
2231     /* read in my part of the matrix column indices  */
2232     nz = procsnz[0];
2233     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2234     mycols = ibuf;
2235     if (size == 1)  nz -= extra_rows;
2236     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2237     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2238 
2239     /* read in every ones (except the last) and ship off */
2240     for ( i=1; i<size-1; i++ ) {
2241       nz   = procsnz[i];
2242       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2243       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2244     }
2245     /* read in the stuff for the last proc */
2246     if ( size != 1 ) {
2247       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2248       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2249       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2250       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2251     }
2252     PetscFree(cols);
2253   } else {
2254     /* determine buffer space needed for message */
2255     nz = 0;
2256     for ( i=0; i<m; i++ ) {
2257       nz += locrowlens[i];
2258     }
2259     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2260     mycols = ibuf;
2261     /* receive message of column indices*/
2262     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2263     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2264     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2265   }
2266 
2267   /* loop over local rows, determining number of off diagonal entries */
2268   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2269   odlens = dlens + (rend-rstart);
2270   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2271   PetscMemzero(mask,3*Mbs*sizeof(int));
2272   masked1 = mask    + Mbs;
2273   masked2 = masked1 + Mbs;
2274   rowcount = 0; nzcount = 0;
2275   for ( i=0; i<mbs; i++ ) {
2276     dcount  = 0;
2277     odcount = 0;
2278     for ( j=0; j<bs; j++ ) {
2279       kmax = locrowlens[rowcount];
2280       for ( k=0; k<kmax; k++ ) {
2281         tmp = mycols[nzcount++]/bs;
2282         if (!mask[tmp]) {
2283           mask[tmp] = 1;
2284           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2285           else masked1[dcount++] = tmp;
2286         }
2287       }
2288       rowcount++;
2289     }
2290 
2291     dlens[i]  = dcount;
2292     odlens[i] = odcount;
2293 
2294     /* zero out the mask elements we set */
2295     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2296     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2297   }
2298 
2299   /* create our matrix */
2300   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2301          CHKERRQ(ierr);
2302   A = *newmat;
2303   MatSetOption(A,MAT_COLUMNS_SORTED);
2304 
2305   if (!rank) {
2306     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2307     /* read in my part of the matrix numerical values  */
2308     nz = procsnz[0];
2309     vals = buf;
2310     mycols = ibuf;
2311     if (size == 1)  nz -= extra_rows;
2312     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2313     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2314 
2315     /* insert into matrix */
2316     jj      = rstart*bs;
2317     for ( i=0; i<m; i++ ) {
2318       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2319       mycols += locrowlens[i];
2320       vals   += locrowlens[i];
2321       jj++;
2322     }
2323     /* read in other processors (except the last one) and ship out */
2324     for ( i=1; i<size-1; i++ ) {
2325       nz   = procsnz[i];
2326       vals = buf;
2327       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2328       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2329     }
2330     /* the last proc */
2331     if ( size != 1 ){
2332       nz   = procsnz[i] - extra_rows;
2333       vals = buf;
2334       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2335       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2336       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2337     }
2338     PetscFree(procsnz);
2339   } else {
2340     /* receive numeric values */
2341     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2342 
2343     /* receive message of values*/
2344     vals   = buf;
2345     mycols = ibuf;
2346     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2347     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2348     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2349 
2350     /* insert into matrix */
2351     jj      = rstart*bs;
2352     for ( i=0; i<m; i++ ) {
2353       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2354       mycols += locrowlens[i];
2355       vals   += locrowlens[i];
2356       jj++;
2357     }
2358   }
2359   PetscFree(locrowlens);
2360   PetscFree(buf);
2361   PetscFree(ibuf);
2362   PetscFree(rowners);
2363   PetscFree(dlens);
2364   PetscFree(mask);
2365   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2366   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 
2371 
2372 #undef __FUNC__
2373 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2374 /*@
2375    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2376 
2377    Input Parameters:
2378 .  mat  - the matrix
2379 .  fact - factor
2380 
2381    Collective on Mat
2382 
2383   Notes:
2384    This can also be set by the command line option: -mat_use_hash_table fact
2385 
2386 .keywords: matrix, hashtable, factor, HT
2387 
2388 .seealso: MatSetOption()
2389 @*/
2390 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2391 {
2392   Mat_MPIBAIJ *baij;
2393 
2394   PetscFunctionBegin;
2395   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2396   if (mat->type != MATMPIBAIJ) {
2397       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2398   }
2399   baij = (Mat_MPIBAIJ*) mat->data;
2400   baij->ht_fact = fact;
2401   PetscFunctionReturn(0);
2402 }
2403