xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e20fef11c1a1457fc77d0de77d14a2dd6b3370e4)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.125 1998/05/29 18:52:46 bsmith Exp balay $";
3 #endif
4 
5 #include "pinclude/pviewer.h"         /*I "mat.h" I*/
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 
10 extern int MatSetUpMultiply_MPIBAIJ(Mat);
11 extern int DisAssemble_MPIBAIJ(Mat);
12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
14 
15 /*
16      Local utility routine that creates a mapping from the global column
17    number to the local number in the off-diagonal part of the local
18    storage of the matrix.  This is done in a non scable way since the
19    length of colmap equals the global matrix length.
20 */
21 #undef __FUNC__
22 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
23 static int CreateColmap_MPIBAIJ_Private(Mat mat)
24 {
25   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
26   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
27   int         nbs = B->nbs,i,bs=B->bs;;
28 
29   PetscFunctionBegin;
30   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
31   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
32   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
33   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
34   PetscFunctionReturn(0);
35 }
36 
37 #define CHUNKSIZE  10
38 
39 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
40 { \
41  \
42     brow = row/bs;  \
43     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
44     rmax = aimax[brow]; nrow = ailen[brow]; \
45       bcol = col/bs; \
46       ridx = row % bs; cidx = col % bs; \
47       low = 0; high = nrow; \
48       while (high-low > 3) { \
49         t = (low+high)/2; \
50         if (rp[t] > bcol) high = t; \
51         else              low  = t; \
52       } \
53       for ( _i=low; _i<high; _i++ ) { \
54         if (rp[_i] > bcol) break; \
55         if (rp[_i] == bcol) { \
56           bap  = ap +  bs2*_i + bs*cidx + ridx; \
57           if (addv == ADD_VALUES) *bap += value;  \
58           else                    *bap  = value;  \
59           goto a_noinsert; \
60         } \
61       } \
62       if (a->nonew == 1) goto a_noinsert; \
63       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
64       if (nrow >= rmax) { \
65         /* there is no extra room in row, therefore enlarge */ \
66         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
67         Scalar *new_a; \
68  \
69         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
70  \
71         /* malloc new storage space */ \
72         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
73         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
74         new_j   = (int *) (new_a + bs2*new_nz); \
75         new_i   = new_j + new_nz; \
76  \
77         /* copy over old data into new slots */ \
78         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
79         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
80         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
81         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
82         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
83                                                            len*sizeof(int)); \
84         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
85         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
86         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
87                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
88         /* free up old matrix storage */ \
89         PetscFree(a->a);  \
90         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
91         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
92         a->singlemalloc = 1; \
93  \
94         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
96         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
97         a->maxnz += bs2*CHUNKSIZE; \
98         a->reallocs++; \
99         a->nz++; \
100       } \
101       N = nrow++ - 1;  \
102       /* shift up all the later entries in this row */ \
103       for ( ii=N; ii>=_i; ii-- ) { \
104         rp[ii+1] = rp[ii]; \
105         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
106       } \
107       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
108       rp[_i]                      = bcol;  \
109       ap[bs2*_i + bs*cidx + ridx] = value;  \
110       a_noinsert:; \
111     ailen[brow] = nrow; \
112 }
113 
114 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
115 { \
116  \
117     brow = row/bs;  \
118     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
119     rmax = bimax[brow]; nrow = bilen[brow]; \
120       bcol = col/bs; \
121       ridx = row % bs; cidx = col % bs; \
122       low = 0; high = nrow; \
123       while (high-low > 3) { \
124         t = (low+high)/2; \
125         if (rp[t] > bcol) high = t; \
126         else              low  = t; \
127       } \
128       for ( _i=low; _i<high; _i++ ) { \
129         if (rp[_i] > bcol) break; \
130         if (rp[_i] == bcol) { \
131           bap  = ap +  bs2*_i + bs*cidx + ridx; \
132           if (addv == ADD_VALUES) *bap += value;  \
133           else                    *bap  = value;  \
134           goto b_noinsert; \
135         } \
136       } \
137       if (b->nonew == 1) goto b_noinsert; \
138       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
139       if (nrow >= rmax) { \
140         /* there is no extra room in row, therefore enlarge */ \
141         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
142         Scalar *new_a; \
143  \
144         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
145  \
146         /* malloc new storage space */ \
147         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
148         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
149         new_j   = (int *) (new_a + bs2*new_nz); \
150         new_i   = new_j + new_nz; \
151  \
152         /* copy over old data into new slots */ \
153         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
154         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
155         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
156         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
157         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
158                                                            len*sizeof(int)); \
159         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
160         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
161         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
162                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
163         /* free up old matrix storage */ \
164         PetscFree(b->a);  \
165         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
166         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
167         b->singlemalloc = 1; \
168  \
169         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
170         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
171         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
172         b->maxnz += bs2*CHUNKSIZE; \
173         b->reallocs++; \
174         b->nz++; \
175       } \
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for ( ii=N; ii>=_i; ii-- ) { \
179         rp[ii+1] = rp[ii]; \
180         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
181       } \
182       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNC__
190 #define __FUNC__ "MatSetValues_MPIBAIJ"
191 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
192 {
193   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
194   Scalar      value;
195   int         ierr,i,j,row,col;
196   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
197   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
198   int         cend_orig=baij->cend_bs,bs=baij->bs;
199 
200   /* Some Variables required in the macro */
201   Mat         A = baij->A;
202   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
203   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
204   Scalar      *aa=a->a;
205 
206   Mat         B = baij->B;
207   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
208   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
209   Scalar      *ba=b->a;
210 
211   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
212   int         low,high,t,ridx,cidx,bs2=a->bs2;
213   Scalar      *ap,*bap;
214 
215   PetscFunctionBegin;
216   for ( i=0; i<m; i++ ) {
217 #if defined(USE_PETSC_BOPT_g)
218     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
219     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for ( j=0; j<n; j++ ) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig){
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         }
230 #if defined(USE_PETSC_BOPT_g)
231         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
232         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
240             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
241               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
244               B = baij->B;
245               b = (Mat_SeqBAIJ *) (B)->data;
246               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
247               ba=b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
252           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
253         }
254       }
255     } else {
256       if (roworiented && !baij->donotstash) {
257         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
258       } else {
259         if (!baij->donotstash) {
260           row = im[i];
261 	  for ( j=0; j<n; j++ ) {
262 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
263           }
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
272 #undef __FUNC__
273 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
274 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
275 {
276   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
277   Scalar      *value,*barray=baij->barray;
278   int         ierr,i,j,ii,jj,row,col,k,l;
279   int         roworiented = baij->roworiented,rstart=baij->rstart ;
280   int         rend=baij->rend,cstart=baij->cstart,stepval;
281   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
282 
283   if(!barray) {
284     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
285   }
286 
287   if (roworiented) {
288     stepval = (n-1)*bs;
289   } else {
290     stepval = (m-1)*bs;
291   }
292   for ( i=0; i<m; i++ ) {
293 #if defined(USE_PETSC_BOPT_g)
294     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
295     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
296 #endif
297     if (im[i] >= rstart && im[i] < rend) {
298       row = im[i] - rstart;
299       for ( j=0; j<n; j++ ) {
300         /* If NumCol = 1 then a copy is not required */
301         if ((roworiented) && (n == 1)) {
302           barray = v + i*bs2;
303         } else if((!roworiented) && (m == 1)) {
304           barray = v + j*bs2;
305         } else { /* Here a copy is required */
306           if (roworiented) {
307             value = v + i*(stepval+bs)*bs + j*bs;
308           } else {
309             value = v + j*(stepval+bs)*bs + i*bs;
310           }
311           for ( ii=0; ii<bs; ii++,value+=stepval ) {
312             for (jj=0; jj<bs; jj++ ) {
313               *barray++  = *value++;
314             }
315           }
316           barray -=bs2;
317         }
318 
319         if (in[j] >= cstart && in[j] < cend){
320           col  = in[j] - cstart;
321           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
322         }
323 #if defined(USE_PETSC_BOPT_g)
324         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
325         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
326 #endif
327         else {
328           if (mat->was_assembled) {
329             if (!baij->colmap) {
330               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
331             }
332 
333 #if defined(USE_PETSC_BOPT_g)
334             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
335 #endif
336             col = (baij->colmap[in[j]] - 1)/bs;
337             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
338               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
339               col =  in[j];
340             }
341           }
342           else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (!baij->donotstash) {
348         if (roworiented ) {
349           row   = im[i]*bs;
350           value = v + i*(stepval+bs)*bs;
351           for ( j=0; j<bs; j++,row++ ) {
352             for ( k=0; k<n; k++ ) {
353               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
354                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
355               }
356             }
357           }
358         } else {
359           for ( j=0; j<n; j++ ) {
360             value = v + j*(stepval+bs)*bs + i*bs;
361             col   = in[j]*bs;
362             for ( k=0; k<bs; k++,col++,value+=stepval) {
363               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
364                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
365               }
366             }
367           }
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 #include <math.h>
375 #define HASH_KEY 0.6180339887
376 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
378 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNC__
380 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
381 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
382 {
383   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
384   int         ierr,i,j,row,col;
385   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
386   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
387   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
388   double      tmp;
389   Scalar      ** HD = baij->hd,value;
390 #if defined(USE_PETSC_BOPT_g)
391   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
392 #endif
393 
394   PetscFunctionBegin;
395 
396   for ( i=0; i<m; i++ ) {
397 #if defined(USE_PETSC_BOPT_g)
398     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
399     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
400 #endif
401       row = im[i];
402     if (row >= rstart_orig && row < rend_orig) {
403       for ( j=0; j<n; j++ ) {
404         col = in[j];
405         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
406         /* Look up into the Hash Table */
407         key = (row/bs)*Nbs+(col/bs)+1;
408         h1  = HASH(size,key,tmp);
409 
410 
411         idx = h1;
412 #if defined(USE_PETSC_BOPT_g)
413         insert_ct++;
414         total_ct++;
415         if (HT[idx] != key) {
416           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
417           if (idx == size) {
418             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
419             if (idx == h1) {
420               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
421             }
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
427           if (idx == size) {
428             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
429             if (idx == h1) {
430               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
431             }
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else {
440       if (roworiented && !baij->donotstash) {
441         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
442       } else {
443         if (!baij->donotstash) {
444           row = im[i];
445 	  for ( j=0; j<n; j++ ) {
446 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
447           }
448         }
449       }
450     }
451   }
452 #if defined(USE_PETSC_BOPT_g)
453   baij->ht_total_ct = total_ct;
454   baij->ht_insert_ct = insert_ct;
455 #endif
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNC__
460 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
461 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
462 {
463   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
464   int         ierr,i,j,ii,jj,row,col,k,l;
465   int         roworiented = baij->roworiented,rstart=baij->rstart ;
466   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
467   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
468   double      tmp;
469   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
470 #if defined(USE_PETSC_BOPT_g)
471   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
472 #endif
473 
474   PetscFunctionBegin;
475 
476   if (roworiented) {
477     stepval = (n-1)*bs;
478   } else {
479     stepval = (m-1)*bs;
480   }
481   for ( i=0; i<m; i++ ) {
482 #if defined(USE_PETSC_BOPT_g)
483     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
484     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
485 #endif
486     row   = im[i];
487     v_t   = v + i*bs2;
488     if (row >= rstart && row < rend) {
489       for ( j=0; j<n; j++ ) {
490         col = in[j];
491 
492         /* Look up into the Hash Table */
493         key = row*Nbs+col+1;
494         h1  = HASH(size,key,tmp);
495 
496         idx = h1;
497 #if defined(USE_PETSC_BOPT_g)
498         total_ct++;
499         insert_ct++;
500        if (HT[idx] != key) {
501           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
502           if (idx == size) {
503             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
504             if (idx == h1) {
505               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
506             }
507           }
508         }
509 #else
510         if (HT[idx] != key) {
511           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
512           if (idx == size) {
513             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
514             if (idx == h1) {
515               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
516             }
517           }
518         }
519 #endif
520         baij_a = HD[idx];
521         if (roworiented) {
522           /*value = v + i*(stepval+bs)*bs + j*bs;*/
523           /* value = v + (i*(stepval+bs)+j)*bs; */
524           value = v_t;
525           v_t  += bs;
526           if (addv == ADD_VALUES) {
527             for ( ii=0; ii<bs; ii++,value+=stepval) {
528               for ( jj=ii; jj<bs2; jj+=bs ) {
529                 baij_a[jj]  += *value++;
530               }
531             }
532           } else {
533             for ( ii=0; ii<bs; ii++,value+=stepval) {
534               for ( jj=ii; jj<bs2; jj+=bs ) {
535                 baij_a[jj]  = *value++;
536               }
537             }
538           }
539         } else {
540           value = v + j*(stepval+bs)*bs + i*bs;
541           if (addv == ADD_VALUES) {
542             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
543               for ( jj=0; jj<bs; jj++ ) {
544                 baij_a[jj]  += *value++;
545               }
546             }
547           } else {
548             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
549               for ( jj=0; jj<bs; jj++ ) {
550                 baij_a[jj]  = *value++;
551               }
552             }
553           }
554         }
555       }
556     } else {
557       if (!baij->donotstash) {
558         if (roworiented ) {
559           row   = im[i]*bs;
560           value = v + i*(stepval+bs)*bs;
561           for ( j=0; j<bs; j++,row++ ) {
562             for ( k=0; k<n; k++ ) {
563               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
564                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
565               }
566             }
567           }
568         } else {
569           for ( j=0; j<n; j++ ) {
570             value = v + j*(stepval+bs)*bs + i*bs;
571             col   = in[j]*bs;
572             for ( k=0; k<bs; k++,col++,value+=stepval) {
573               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
574                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
575               }
576             }
577           }
578         }
579       }
580     }
581   }
582 #if defined(USE_PETSC_BOPT_g)
583   baij->ht_total_ct = total_ct;
584   baij->ht_insert_ct = insert_ct;
585 #endif
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNC__
590 #define __FUNC__ "MatGetValues_MPIBAIJ"
591 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
592 {
593   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
594   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
595   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
596 
597   PetscFunctionBegin;
598   for ( i=0; i<m; i++ ) {
599     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
600     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
601     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
602       row = idxm[i] - bsrstart;
603       for ( j=0; j<n; j++ ) {
604         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
605         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
606         if (idxn[j] >= bscstart && idxn[j] < bscend){
607           col = idxn[j] - bscstart;
608           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
609         } else {
610           if (!baij->colmap) {
611             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
612           }
613           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
614              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
615           else {
616             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
617             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
618           }
619         }
620       }
621     } else {
622       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
623     }
624   }
625  PetscFunctionReturn(0);
626 }
627 
628 #undef __FUNC__
629 #define __FUNC__ "MatNorm_MPIBAIJ"
630 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
631 {
632   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
633   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
634   int        ierr, i,bs2=baij->bs2;
635   double     sum = 0.0;
636   Scalar     *v;
637 
638   PetscFunctionBegin;
639   if (baij->size == 1) {
640     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
641   } else {
642     if (type == NORM_FROBENIUS) {
643       v = amat->a;
644       for (i=0; i<amat->nz*bs2; i++ ) {
645 #if defined(USE_PETSC_COMPLEX)
646         sum += PetscReal(PetscConj(*v)*(*v)); v++;
647 #else
648         sum += (*v)*(*v); v++;
649 #endif
650       }
651       v = bmat->a;
652       for (i=0; i<bmat->nz*bs2; i++ ) {
653 #if defined(USE_PETSC_COMPLEX)
654         sum += PetscReal(PetscConj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
660       *norm = sqrt(*norm);
661     } else {
662       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
663     }
664   }
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNC__
669 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
670 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
673   MPI_Comm    comm = mat->comm;
674   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
675   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
676   MPI_Request *send_waits,*recv_waits;
677   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
678   InsertMode  addv;
679   Scalar      *rvalues,*svalues;
680 
681   PetscFunctionBegin;
682   if (baij->donotstash) {
683     baij->svalues    = 0; baij->rvalues    = 0;
684     baij->nsends     = 0; baij->nrecvs     = 0;
685     baij->send_waits = 0; baij->recv_waits = 0;
686     baij->rmax       = 0;
687     PetscFunctionReturn(0);
688   }
689 
690   /* make sure all processors are either in INSERTMODE or ADDMODE */
691   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
692   if (addv == (ADD_VALUES|INSERT_VALUES)) {
693     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
694   }
695   mat->insertmode = addv; /* in case this processor had no cache */
696 
697   /*  first count number of contributors to each processor */
698   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
699   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
700   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
701   for ( i=0; i<baij->stash.n; i++ ) {
702     idx = baij->stash.idx[i];
703     for ( j=0; j<size; j++ ) {
704       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
705         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
706       }
707     }
708   }
709   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
710 
711   /* inform other processors of number of messages and max length*/
712   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
713   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
714   nreceives = work[rank];
715   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
716   nmax      = work[rank];
717   PetscFree(work);
718 
719   /* post receives:
720        1) each message will consist of ordered pairs
721      (global index,value) we store the global index as a double
722      to simplify the message passing.
723        2) since we don't know how long each individual message is we
724      allocate the largest needed buffer for each receive. Potentially
725      this is a lot of wasted space.
726 
727 
728        This could be done better.
729   */
730   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
731   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
732   for ( i=0; i<nreceives; i++ ) {
733     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
734   }
735 
736   /* do sends:
737       1) starts[i] gives the starting index in svalues for stuff going to
738          the ith processor
739   */
740   svalues    = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
741   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
742   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
743   starts[0] = 0;
744   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
745   for ( i=0; i<baij->stash.n; i++ ) {
746     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
747     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
748     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
749   }
750   PetscFree(owner);
751   starts[0] = 0;
752   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
753   count = 0;
754   for ( i=0; i<size; i++ ) {
755     if (procs[i]) {
756       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
757     }
758   }
759   PetscFree(starts); PetscFree(nprocs);
760 
761   /* Free cache space */
762   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
763   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
764 
765   baij->svalues    = svalues;    baij->rvalues    = rvalues;
766   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
767   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
768   baij->rmax       = nmax;
769 
770   PetscFunctionReturn(0);
771 }
772 
773 /*
774   Creates the hash table, and sets the table
775   This table is created only once.
776   If new entried need to be added to the matrix
777   then the hash table has to be destroyed and
778   recreated.
779 */
780 #undef __FUNC__
781 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
782 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
783 {
784   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
785   Mat         A = baij->A, B=baij->B;
786   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
787   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
788   int         size,bs2=baij->bs2,rstart=baij->rstart;
789   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
790   int         *HT,key;
791   Scalar      **HD;
792   double      tmp;
793 #if defined(USE_PETSC_BOPT_g)
794   int         ct=0,max=0;
795 #endif
796 
797   PetscFunctionBegin;
798   baij->ht_size=(int)(factor*nz);
799   size = baij->ht_size;
800 
801   if (baij->ht) {
802     PetscFunctionReturn(0);
803   }
804 
805   /* Allocate Memory for Hash Table */
806   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
807   baij->ht = (int*)(baij->hd + size);
808   HD = baij->hd;
809   HT = baij->ht;
810 
811 
812   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
813 
814 
815   /* Loop Over A */
816   for ( i=0; i<a->mbs; i++ ) {
817     for ( j=ai[i]; j<ai[i+1]; j++ ) {
818       row = i+rstart;
819       col = aj[j]+cstart;
820 
821       key = row*Nbs + col + 1;
822       h1  = HASH(size,key,tmp);
823       for ( k=0; k<size; k++ ){
824         if (HT[(h1+k)%size] == 0.0) {
825           HT[(h1+k)%size] = key;
826           HD[(h1+k)%size] = a->a + j*bs2;
827           break;
828 #if defined(USE_PETSC_BOPT_g)
829         } else {
830           ct++;
831 #endif
832         }
833       }
834 #if defined(USE_PETSC_BOPT_g)
835       if (k> max) max = k;
836 #endif
837     }
838   }
839   /* Loop Over B */
840   for ( i=0; i<b->mbs; i++ ) {
841     for ( j=bi[i]; j<bi[i+1]; j++ ) {
842       row = i+rstart;
843       col = garray[bj[j]];
844       key = row*Nbs + col + 1;
845       h1  = HASH(size,key,tmp);
846       for ( k=0; k<size; k++ ){
847         if (HT[(h1+k)%size] == 0.0) {
848           HT[(h1+k)%size] = key;
849           HD[(h1+k)%size] = b->a + j*bs2;
850           break;
851 #if defined(USE_PETSC_BOPT_g)
852         } else {
853           ct++;
854 #endif
855         }
856       }
857 #if defined(USE_PETSC_BOPT_g)
858       if (k> max) max = k;
859 #endif
860     }
861   }
862 
863   /* Print Summary */
864 #if defined(USE_PETSC_BOPT_g)
865   for ( i=0,j=0; i<size; i++)
866     if (HT[i]) {j++;}
867   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
868            (j== 0)? 0.0:((double)(ct+j))/j,max);
869 #endif
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNC__
874 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
875 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
876 {
877   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
878   MPI_Status  *send_status,recv_status;
879   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
880   int         bs=baij->bs,row,col,other_disassembled;
881   Scalar      *values,val;
882   InsertMode  addv = mat->insertmode;
883 
884   PetscFunctionBegin;
885   /*  wait on receives */
886   while (count) {
887     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
888     /* unpack receives into our local space */
889     values = baij->rvalues + 3*imdex*baij->rmax;
890     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
891     n = n/3;
892     for ( i=0; i<n; i++ ) {
893       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
894       col = (int) PetscReal(values[3*i+1]);
895       val = values[3*i+2];
896       if (col >= baij->cstart*bs && col < baij->cend*bs) {
897         col -= baij->cstart*bs;
898         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
899       } else {
900         if (mat->was_assembled) {
901           if (!baij->colmap) {
902             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
903           }
904           col = (baij->colmap[col/bs]) - 1 + col%bs;
905           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
906             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
907             col = (int) PetscReal(values[3*i+1]);
908           }
909         }
910         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
911       }
912     }
913     count--;
914   }
915   if (baij->recv_waits) PetscFree(baij->recv_waits);
916   if (baij->rvalues)    PetscFree(baij->rvalues);
917 
918   /* wait on sends */
919   if (baij->nsends) {
920     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
921     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
922     PetscFree(send_status);
923   }
924   if (baij->send_waits) PetscFree(baij->send_waits);
925   if (baij->svalues)    PetscFree(baij->svalues);
926 
927   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
928   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
929 
930   /* determine if any processor has disassembled, if so we must
931      also disassemble ourselfs, in order that we may reassemble. */
932   /*
933      if nonzero structure of submatrix B cannot change then we know that
934      no processor disassembled thus we can skip this stuff
935   */
936   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
937     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
938     if (mat->was_assembled && !other_disassembled) {
939       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
940     }
941   }
942 
943   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
944     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
945   }
946   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
947   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
948 
949 #if defined(USE_PETSC_BOPT_g)
950   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
951     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
952              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
953     baij->ht_total_ct  = 0;
954     baij->ht_insert_ct = 0;
955   }
956 #endif
957   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
958     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
959     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
960     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
961   }
962 
963   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
964   PetscFunctionReturn(0);
965 }
966 
967 #undef __FUNC__
968 #define __FUNC__ "MatView_MPIBAIJ_Binary"
969 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
970 {
971   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
972   int          ierr;
973 
974   PetscFunctionBegin;
975   if (baij->size == 1) {
976     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
977   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNC__
982 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
983 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
984 {
985   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
986   int          ierr, format,rank,bs = baij->bs;
987   FILE         *fd;
988   ViewerType   vtype;
989 
990   PetscFunctionBegin;
991   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
992   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
993     ierr = ViewerGetFormat(viewer,&format);
994     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
995       MatInfo info;
996       MPI_Comm_rank(mat->comm,&rank);
997       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
998       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
999       PetscSequentialPhaseBegin(mat->comm,1);
1000       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1001               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1002               baij->bs,(int)info.memory);
1003       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
1004       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
1005       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
1006       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
1007       fflush(fd);
1008       PetscSequentialPhaseEnd(mat->comm,1);
1009       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
1010       PetscFunctionReturn(0);
1011     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1012       PetscPrintf(mat->comm,"  block size is %d\n",bs);
1013       PetscFunctionReturn(0);
1014     }
1015   }
1016 
1017   if (vtype == DRAW_VIEWER) {
1018     Draw       draw;
1019     PetscTruth isnull;
1020     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
1021     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1022   }
1023 
1024   if (vtype == ASCII_FILE_VIEWER) {
1025     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1026     PetscSequentialPhaseBegin(mat->comm,1);
1027     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
1028            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
1029             baij->cstart*bs,baij->cend*bs);
1030     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1031     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
1032     fflush(fd);
1033     PetscSequentialPhaseEnd(mat->comm,1);
1034   } else {
1035     int size = baij->size;
1036     rank = baij->rank;
1037     if (size == 1) {
1038       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1039     } else {
1040       /* assemble the entire matrix onto first processor. */
1041       Mat         A;
1042       Mat_SeqBAIJ *Aloc;
1043       int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
1044       int         mbs=baij->mbs;
1045       Scalar      *a;
1046 
1047       if (!rank) {
1048         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1049       } else {
1050         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1051       }
1052       PLogObjectParent(mat,A);
1053 
1054       /* copy over the A part */
1055       Aloc = (Mat_SeqBAIJ*) baij->A->data;
1056       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1057       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1058 
1059       for ( i=0; i<mbs; i++ ) {
1060         rvals[0] = bs*(baij->rstart + i);
1061         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1062         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1063           col = (baij->cstart+aj[j])*bs;
1064           for (k=0; k<bs; k++ ) {
1065             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1066             col++; a += bs;
1067           }
1068         }
1069       }
1070       /* copy over the B part */
1071       Aloc = (Mat_SeqBAIJ*) baij->B->data;
1072       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1073       for ( i=0; i<mbs; i++ ) {
1074         rvals[0] = bs*(baij->rstart + i);
1075         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1076         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1077           col = baij->garray[aj[j]]*bs;
1078           for (k=0; k<bs; k++ ) {
1079             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1080             col++; a += bs;
1081           }
1082         }
1083       }
1084       PetscFree(rvals);
1085       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1086       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1087       /*
1088          Everyone has to call to draw the matrix since the graphics waits are
1089          synchronized across all processors that share the Draw object
1090       */
1091       if (!rank || vtype == DRAW_VIEWER) {
1092         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1093       }
1094       ierr = MatDestroy(A); CHKERRQ(ierr);
1095     }
1096   }
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 
1101 
1102 #undef __FUNC__
1103 #define __FUNC__ "MatView_MPIBAIJ"
1104 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1105 {
1106   int         ierr;
1107   ViewerType  vtype;
1108 
1109   PetscFunctionBegin;
1110   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1111   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
1112       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
1113     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
1114   } else if (vtype == BINARY_FILE_VIEWER) {
1115     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1116   } else {
1117     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1118   }
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNC__
1123 #define __FUNC__ "MatDestroy_MPIBAIJ"
1124 int MatDestroy_MPIBAIJ(Mat mat)
1125 {
1126   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1127   int         ierr;
1128 
1129   PetscFunctionBegin;
1130   if (--mat->refct > 0) PetscFunctionReturn(0);
1131 
1132   if (mat->mapping) {
1133     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
1134   }
1135   if (mat->bmapping) {
1136     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
1137   }
1138 #if defined(USE_PETSC_LOG)
1139   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1140 #endif
1141 
1142   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1143   PetscFree(baij->rowners);
1144   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1145   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1146   if (baij->colmap) PetscFree(baij->colmap);
1147   if (baij->garray) PetscFree(baij->garray);
1148   if (baij->lvec)   VecDestroy(baij->lvec);
1149   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1150   if (baij->rowvalues) PetscFree(baij->rowvalues);
1151   if (baij->barray) PetscFree(baij->barray);
1152   if (baij->hd) PetscFree(baij->hd);
1153   PetscFree(baij);
1154   PLogObjectDestroy(mat);
1155   PetscHeaderDestroy(mat);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNC__
1160 #define __FUNC__ "MatMult_MPIBAIJ"
1161 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1162 {
1163   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1164   int         ierr, nt;
1165 
1166   PetscFunctionBegin;
1167   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1168   if (nt != a->n) {
1169     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1170   }
1171   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1172   if (nt != a->m) {
1173     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1174   }
1175   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1176   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1177   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1178   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1179   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNC__
1184 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1185 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1186 {
1187   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1188   int        ierr;
1189 
1190   PetscFunctionBegin;
1191   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1192   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1193   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1194   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNC__
1199 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1200 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1201 {
1202   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1203   int         ierr;
1204 
1205   PetscFunctionBegin;
1206   /* do nondiagonal part */
1207   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1208   /* send it on its way */
1209   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1210   /* do local part */
1211   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1212   /* receive remote parts: note this assumes the values are not actually */
1213   /* inserted in yy until the next line, which is true for my implementation*/
1214   /* but is not perhaps always true. */
1215   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNC__
1220 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1221 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1222 {
1223   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1224   int         ierr;
1225 
1226   PetscFunctionBegin;
1227   /* do nondiagonal part */
1228   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1229   /* send it on its way */
1230   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1231   /* do local part */
1232   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1233   /* receive remote parts: note this assumes the values are not actually */
1234   /* inserted in yy until the next line, which is true for my implementation*/
1235   /* but is not perhaps always true. */
1236   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 /*
1241   This only works correctly for square matrices where the subblock A->A is the
1242    diagonal block
1243 */
1244 #undef __FUNC__
1245 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1246 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1247 {
1248   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1249   int         ierr;
1250 
1251   PetscFunctionBegin;
1252   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1253   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNC__
1258 #define __FUNC__ "MatScale_MPIBAIJ"
1259 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1260 {
1261   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1262   int         ierr;
1263 
1264   PetscFunctionBegin;
1265   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1266   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNC__
1271 #define __FUNC__ "MatGetSize_MPIBAIJ"
1272 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1273 {
1274   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1275 
1276   PetscFunctionBegin;
1277   if (m) *m = mat->M;
1278   if (n) *n = mat->N;
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNC__
1283 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1284 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1285 {
1286   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1287 
1288   PetscFunctionBegin;
1289   *m = mat->m; *n = mat->n;
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 #undef __FUNC__
1294 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1295 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1296 {
1297   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1298 
1299   PetscFunctionBegin;
1300   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1305 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1306 
1307 #undef __FUNC__
1308 #define __FUNC__ "MatGetRow_MPIBAIJ"
1309 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1310 {
1311   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1312   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1313   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1314   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1315   int        *cmap, *idx_p,cstart = mat->cstart;
1316 
1317   PetscFunctionBegin;
1318   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1319   mat->getrowactive = PETSC_TRUE;
1320 
1321   if (!mat->rowvalues && (idx || v)) {
1322     /*
1323         allocate enough space to hold information from the longest row.
1324     */
1325     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1326     int     max = 1,mbs = mat->mbs,tmp;
1327     for ( i=0; i<mbs; i++ ) {
1328       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1329       if (max < tmp) { max = tmp; }
1330     }
1331     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1332     CHKPTRQ(mat->rowvalues);
1333     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1334   }
1335 
1336   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1337   lrow = row - brstart;
1338 
1339   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1340   if (!v)   {pvA = 0; pvB = 0;}
1341   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1342   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1343   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1344   nztot = nzA + nzB;
1345 
1346   cmap  = mat->garray;
1347   if (v  || idx) {
1348     if (nztot) {
1349       /* Sort by increasing column numbers, assuming A and B already sorted */
1350       int imark = -1;
1351       if (v) {
1352         *v = v_p = mat->rowvalues;
1353         for ( i=0; i<nzB; i++ ) {
1354           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1355           else break;
1356         }
1357         imark = i;
1358         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1359         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1360       }
1361       if (idx) {
1362         *idx = idx_p = mat->rowindices;
1363         if (imark > -1) {
1364           for ( i=0; i<imark; i++ ) {
1365             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1366           }
1367         } else {
1368           for ( i=0; i<nzB; i++ ) {
1369             if (cmap[cworkB[i]/bs] < cstart)
1370               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1371             else break;
1372           }
1373           imark = i;
1374         }
1375         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1376         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1377       }
1378     } else {
1379       if (idx) *idx = 0;
1380       if (v)   *v   = 0;
1381     }
1382   }
1383   *nz = nztot;
1384   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1385   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNC__
1390 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1391 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1392 {
1393   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1394 
1395   PetscFunctionBegin;
1396   if (baij->getrowactive == PETSC_FALSE) {
1397     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1398   }
1399   baij->getrowactive = PETSC_FALSE;
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNC__
1404 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1405 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1406 {
1407   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1408 
1409   PetscFunctionBegin;
1410   *bs = baij->bs;
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNC__
1415 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1416 int MatZeroEntries_MPIBAIJ(Mat A)
1417 {
1418   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1419   int         ierr;
1420 
1421   PetscFunctionBegin;
1422   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1423   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNC__
1428 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1429 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1430 {
1431   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1432   Mat         A = a->A, B = a->B;
1433   int         ierr;
1434   double      isend[5], irecv[5];
1435 
1436   PetscFunctionBegin;
1437   info->block_size     = (double)a->bs;
1438   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1439   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->memory;
1440   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1441   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->memory;
1442   if (flag == MAT_LOCAL) {
1443     info->nz_used      = isend[0];
1444     info->nz_allocated = isend[1];
1445     info->nz_unneeded  = isend[2];
1446     info->memory       = isend[3];
1447     info->mallocs      = isend[4];
1448   } else if (flag == MAT_GLOBAL_MAX) {
1449     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1450     info->nz_used      = irecv[0];
1451     info->nz_allocated = irecv[1];
1452     info->nz_unneeded  = irecv[2];
1453     info->memory       = irecv[3];
1454     info->mallocs      = irecv[4];
1455   } else if (flag == MAT_GLOBAL_SUM) {
1456     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1457     info->nz_used      = irecv[0];
1458     info->nz_allocated = irecv[1];
1459     info->nz_unneeded  = irecv[2];
1460     info->memory       = irecv[3];
1461     info->mallocs      = irecv[4];
1462   }
1463   info->rows_global       = (double)a->M;
1464   info->columns_global    = (double)a->N;
1465   info->rows_local        = (double)a->m;
1466   info->columns_local     = (double)a->N;
1467   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1468   info->fill_ratio_needed = 0;
1469   info->factor_mallocs    = 0;
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNC__
1474 #define __FUNC__ "MatSetOption_MPIBAIJ"
1475 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1476 {
1477   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1478 
1479   PetscFunctionBegin;
1480   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1481       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1482       op == MAT_COLUMNS_UNSORTED ||
1483       op == MAT_COLUMNS_SORTED ||
1484       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1485       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1486         MatSetOption(a->A,op);
1487         MatSetOption(a->B,op);
1488   } else if (op == MAT_ROW_ORIENTED) {
1489         a->roworiented = 1;
1490         MatSetOption(a->A,op);
1491         MatSetOption(a->B,op);
1492   } else if (op == MAT_ROWS_SORTED ||
1493              op == MAT_ROWS_UNSORTED ||
1494              op == MAT_SYMMETRIC ||
1495              op == MAT_STRUCTURALLY_SYMMETRIC ||
1496              op == MAT_YES_NEW_DIAGONALS ||
1497              op == MAT_USE_HASH_TABLE)
1498     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1499   else if (op == MAT_COLUMN_ORIENTED) {
1500     a->roworiented = 0;
1501     MatSetOption(a->A,op);
1502     MatSetOption(a->B,op);
1503   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1504     a->donotstash = 1;
1505   } else if (op == MAT_NO_NEW_DIAGONALS) {
1506     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1507   } else if (op == MAT_USE_HASH_TABLE) {
1508     a->ht_flag = 1;
1509   } else {
1510     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1511   }
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #undef __FUNC__
1516 #define __FUNC__ "MatTranspose_MPIBAIJ("
1517 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1518 {
1519   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1520   Mat_SeqBAIJ *Aloc;
1521   Mat        B;
1522   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1523   int        bs=baij->bs,mbs=baij->mbs;
1524   Scalar     *a;
1525 
1526   PetscFunctionBegin;
1527   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1528   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1529   CHKERRQ(ierr);
1530 
1531   /* copy over the A part */
1532   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1533   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1534   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1535 
1536   for ( i=0; i<mbs; i++ ) {
1537     rvals[0] = bs*(baij->rstart + i);
1538     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1539     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1540       col = (baij->cstart+aj[j])*bs;
1541       for (k=0; k<bs; k++ ) {
1542         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1543         col++; a += bs;
1544       }
1545     }
1546   }
1547   /* copy over the B part */
1548   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1549   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1550   for ( i=0; i<mbs; i++ ) {
1551     rvals[0] = bs*(baij->rstart + i);
1552     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1553     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1554       col = baij->garray[aj[j]]*bs;
1555       for (k=0; k<bs; k++ ) {
1556         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1557         col++; a += bs;
1558       }
1559     }
1560   }
1561   PetscFree(rvals);
1562   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1563   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1564 
1565   if (matout != PETSC_NULL) {
1566     *matout = B;
1567   } else {
1568     PetscOps       *Abops;
1569     struct _MatOps *Aops;
1570 
1571     /* This isn't really an in-place transpose .... but free data structures from baij */
1572     PetscFree(baij->rowners);
1573     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1574     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1575     if (baij->colmap) PetscFree(baij->colmap);
1576     if (baij->garray) PetscFree(baij->garray);
1577     if (baij->lvec) VecDestroy(baij->lvec);
1578     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1579     PetscFree(baij);
1580 
1581     /*
1582        This is horrible, horrible code. We need to keep the
1583       A pointers for the bops and ops but copy everything
1584       else from C.
1585     */
1586     Abops = A->bops;
1587     Aops  = A->ops;
1588     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1589     A->bops = Abops;
1590     A->ops  = Aops;
1591 
1592     PetscHeaderDestroy(B);
1593   }
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #undef __FUNC__
1598 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1599 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1600 {
1601   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1602   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1603   int ierr,s1,s2,s3;
1604 
1605   PetscFunctionBegin;
1606   if (ll)  {
1607     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1608     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1609     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1610     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1611     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1612   }
1613   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNC__
1618 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1619 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1620 {
1621   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1622   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1623   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1624   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1625   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1626   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1627   MPI_Comm       comm = A->comm;
1628   MPI_Request    *send_waits,*recv_waits;
1629   MPI_Status     recv_status,*send_status;
1630   IS             istmp;
1631   PetscTruth     localdiag;
1632 
1633   PetscFunctionBegin;
1634   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1635   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1636 
1637   /*  first count number of contributors to each processor */
1638   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1639   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1640   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1641   for ( i=0; i<N; i++ ) {
1642     idx = rows[i];
1643     found = 0;
1644     for ( j=0; j<size; j++ ) {
1645       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1646         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1647       }
1648     }
1649     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1650   }
1651   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1652 
1653   /* inform other processors of number of messages and max length*/
1654   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1655   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1656   nrecvs = work[rank];
1657   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1658   nmax   = work[rank];
1659   PetscFree(work);
1660 
1661   /* post receives:   */
1662   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1663   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1664   for ( i=0; i<nrecvs; i++ ) {
1665     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1666   }
1667 
1668   /* do sends:
1669      1) starts[i] gives the starting index in svalues for stuff going to
1670      the ith processor
1671   */
1672   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1673   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1674   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1675   starts[0] = 0;
1676   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1677   for ( i=0; i<N; i++ ) {
1678     svalues[starts[owner[i]]++] = rows[i];
1679   }
1680   ISRestoreIndices(is,&rows);
1681 
1682   starts[0] = 0;
1683   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1684   count = 0;
1685   for ( i=0; i<size; i++ ) {
1686     if (procs[i]) {
1687       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1688     }
1689   }
1690   PetscFree(starts);
1691 
1692   base = owners[rank]*bs;
1693 
1694   /*  wait on receives */
1695   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1696   source = lens + nrecvs;
1697   count  = nrecvs; slen = 0;
1698   while (count) {
1699     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1700     /* unpack receives into our local space */
1701     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1702     source[imdex]  = recv_status.MPI_SOURCE;
1703     lens[imdex]  = n;
1704     slen += n;
1705     count--;
1706   }
1707   PetscFree(recv_waits);
1708 
1709   /* move the data into the send scatter */
1710   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1711   count = 0;
1712   for ( i=0; i<nrecvs; i++ ) {
1713     values = rvalues + i*nmax;
1714     for ( j=0; j<lens[i]; j++ ) {
1715       lrows[count++] = values[j] - base;
1716     }
1717   }
1718   PetscFree(rvalues); PetscFree(lens);
1719   PetscFree(owner); PetscFree(nprocs);
1720 
1721   /* actually zap the local rows */
1722   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1723   PLogObjectParent(A,istmp);
1724 
1725   /*
1726         Zero the required rows. If the "diagonal block" of the matrix
1727      is square and the user wishes to set the diagonal we use seperate
1728      code so that MatSetValues() is not called for each diagonal allocating
1729      new memory, thus calling lots of mallocs and slowing things down.
1730 
1731        Contributed by: Mathew Knepley
1732   */
1733   localdiag = PETSC_FALSE;
1734   if (diag && (l->A->M == l->A->N)) {
1735     localdiag = PETSC_TRUE;
1736     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1737   } else {
1738     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1739   }
1740   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1741   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1742 
1743   if (diag && (localdiag == PETSC_FALSE)) {
1744     for ( i = 0; i < slen; i++ ) {
1745       row = lrows[i] + rstart_bs;
1746       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1747     }
1748     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1749     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1750   }
1751   PetscFree(lrows);
1752 
1753   /* wait on sends */
1754   if (nsends) {
1755     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1756     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1757     PetscFree(send_status);
1758   }
1759   PetscFree(send_waits); PetscFree(svalues);
1760 
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 extern int MatPrintHelp_SeqBAIJ(Mat);
1765 #undef __FUNC__
1766 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1767 int MatPrintHelp_MPIBAIJ(Mat A)
1768 {
1769   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1770   MPI_Comm    comm = A->comm;
1771   static int  called = 0;
1772   int         ierr;
1773 
1774   PetscFunctionBegin;
1775   if (!a->rank) {
1776     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1777   }
1778   if (called) {PetscFunctionReturn(0);} else called = 1;
1779   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1780   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNC__
1785 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1786 int MatSetUnfactored_MPIBAIJ(Mat A)
1787 {
1788   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1789   int         ierr;
1790 
1791   PetscFunctionBegin;
1792   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1793   PetscFunctionReturn(0);
1794 }
1795 
1796 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
1797 
1798 /* -------------------------------------------------------------------*/
1799 static struct _MatOps MatOps = {
1800   MatSetValues_MPIBAIJ,MatGetRow_MPIBAIJ,MatRestoreRow_MPIBAIJ,MatMult_MPIBAIJ,
1801   MatMultAdd_MPIBAIJ,MatMultTrans_MPIBAIJ,MatMultTransAdd_MPIBAIJ,0,
1802   0,0,0,0,
1803   0,0,MatTranspose_MPIBAIJ,MatGetInfo_MPIBAIJ,
1804   0,MatGetDiagonal_MPIBAIJ,MatDiagonalScale_MPIBAIJ,MatNorm_MPIBAIJ,
1805   MatAssemblyBegin_MPIBAIJ,MatAssemblyEnd_MPIBAIJ,0,MatSetOption_MPIBAIJ,
1806   MatZeroEntries_MPIBAIJ,MatZeroRows_MPIBAIJ,0,
1807   0,0,0,MatGetSize_MPIBAIJ,
1808   MatGetLocalSize_MPIBAIJ,MatGetOwnershipRange_MPIBAIJ,0,0,
1809   0,0,MatConvertSameType_MPIBAIJ,0,0,
1810   0,0,0,MatGetSubMatrices_MPIBAIJ,
1811   MatIncreaseOverlap_MPIBAIJ,MatGetValues_MPIBAIJ,0,MatPrintHelp_MPIBAIJ,
1812   MatScale_MPIBAIJ,0,0,0,MatGetBlockSize_MPIBAIJ,
1813   0,0,0,0,0,0,MatSetUnfactored_MPIBAIJ,0,MatSetValuesBlocked_MPIBAIJ};
1814 
1815 
1816 #undef __FUNC__
1817 #define __FUNC__ "MatCreateMPIBAIJ"
1818 /*@C
1819    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1820    (block compressed row).  For good matrix assembly performance
1821    the user should preallocate the matrix storage by setting the parameters
1822    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1823    performance can be increased by more than a factor of 50.
1824 
1825    Collective on MPI_Comm
1826 
1827    Input Parameters:
1828 +  comm - MPI communicator
1829 .  bs   - size of blockk
1830 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1831            This value should be the same as the local size used in creating the
1832            y vector for the matrix-vector product y = Ax.
1833 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1834            This value should be the same as the local size used in creating the
1835            x vector for the matrix-vector product y = Ax.
1836 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1837 .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1838 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1839            submatrix  (same for all local rows)
1840 .  d_nzz - array containing the number of block nonzeros in the various block rows
1841            of the in diagonal portion of the local (possibly different for each block
1842            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1843 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1844            submatrix (same for all local rows).
1845 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1846            off-diagonal portion of the local submatrix (possibly different for
1847            each block row) or PETSC_NULL.
1848 
1849    Output Parameter:
1850 .  A - the matrix
1851 
1852    Options Database Keys:
1853 .   -mat_no_unroll - uses code that does not unroll the loops in the
1854                      block calculations (much slower)
1855 .   -mat_block_size - size of the blocks to use
1856 
1857    Notes:
1858    The user MUST specify either the local or global matrix dimensions
1859    (possibly both).
1860 
1861    Storage Information:
1862    For a square global matrix we define each processor's diagonal portion
1863    to be its local rows and the corresponding columns (a square submatrix);
1864    each processor's off-diagonal portion encompasses the remainder of the
1865    local matrix (a rectangular submatrix).
1866 
1867    The user can specify preallocated storage for the diagonal part of
1868    the local submatrix with either d_nz or d_nnz (not both).  Set
1869    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1870    memory allocation.  Likewise, specify preallocated storage for the
1871    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1872 
1873    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1874    the figure below we depict these three local rows and all columns (0-11).
1875 
1876 .vb
1877            0 1 2 3 4 5 6 7 8 9 10 11
1878           -------------------
1879    row 3  |  o o o d d d o o o o o o
1880    row 4  |  o o o d d d o o o o o o
1881    row 5  |  o o o d d d o o o o o o
1882           -------------------
1883 .ve
1884 
1885    Thus, any entries in the d locations are stored in the d (diagonal)
1886    submatrix, and any entries in the o locations are stored in the
1887    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1888    stored simply in the MATSEQBAIJ format for compressed row storage.
1889 
1890    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1891    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1892    In general, for PDE problems in which most nonzeros are near the diagonal,
1893    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1894    or you will get TERRIBLE performance; see the users' manual chapter on
1895    matrices.
1896 
1897 .keywords: matrix, block, aij, compressed row, sparse, parallel
1898 
1899 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1900 @*/
1901 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1902                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1903 {
1904   Mat          B;
1905   Mat_MPIBAIJ  *b;
1906   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1907 
1908   PetscFunctionBegin;
1909   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1910 
1911   MPI_Comm_size(comm,&size);
1912   if (size == 1) {
1913     if (M == PETSC_DECIDE) M = m;
1914     if (N == PETSC_DECIDE) N = n;
1915     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1916     PetscFunctionReturn(0);
1917   }
1918 
1919   *A = 0;
1920   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
1921   PLogObjectCreate(B);
1922   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1923   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1924   PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps));
1925 
1926   B->ops->destroy    = MatDestroy_MPIBAIJ;
1927   B->ops->view       = MatView_MPIBAIJ;
1928   B->mapping    = 0;
1929   B->factor     = 0;
1930   B->assembled  = PETSC_FALSE;
1931 
1932   B->insertmode = NOT_SET_VALUES;
1933   MPI_Comm_rank(comm,&b->rank);
1934   MPI_Comm_size(comm,&b->size);
1935 
1936   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1937     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1938   }
1939   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1940     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1941   }
1942   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1943     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1944   }
1945   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1946   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
1947 
1948   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
1949     work[0] = m; work[1] = n;
1950     mbs = m/bs; nbs = n/bs;
1951     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
1952     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
1953     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
1954   }
1955   if (m == PETSC_DECIDE) {
1956     Mbs = M/bs;
1957     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
1958     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
1959     m   = mbs*bs;
1960   }
1961   if (n == PETSC_DECIDE) {
1962     Nbs = N/bs;
1963     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
1964     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
1965     n   = nbs*bs;
1966   }
1967   if (mbs*bs != m || nbs*bs != n) {
1968     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1969   }
1970 
1971   b->m = m; B->m = m;
1972   b->n = n; B->n = n;
1973   b->N = N; B->N = N;
1974   b->M = M; B->M = M;
1975   b->bs  = bs;
1976   b->bs2 = bs*bs;
1977   b->mbs = mbs;
1978   b->nbs = nbs;
1979   b->Mbs = Mbs;
1980   b->Nbs = Nbs;
1981 
1982   /* build local table of row and column ownerships */
1983   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
1984   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1985   b->cowners = b->rowners + b->size + 2;
1986   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1987   b->rowners[0] = 0;
1988   for ( i=2; i<=b->size; i++ ) {
1989     b->rowners[i] += b->rowners[i-1];
1990   }
1991   b->rstart    = b->rowners[b->rank];
1992   b->rend      = b->rowners[b->rank+1];
1993   b->rstart_bs = b->rstart * bs;
1994   b->rend_bs   = b->rend * bs;
1995 
1996   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1997   b->cowners[0] = 0;
1998   for ( i=2; i<=b->size; i++ ) {
1999     b->cowners[i] += b->cowners[i-1];
2000   }
2001   b->cstart    = b->cowners[b->rank];
2002   b->cend      = b->cowners[b->rank+1];
2003   b->cstart_bs = b->cstart * bs;
2004   b->cend_bs   = b->cend * bs;
2005 
2006 
2007   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2008   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2009   PLogObjectParent(B,b->A);
2010   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2011   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2012   PLogObjectParent(B,b->B);
2013 
2014   /* build cache for off array entries formed */
2015   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
2016   b->donotstash  = 0;
2017   b->colmap      = 0;
2018   b->garray      = 0;
2019   b->roworiented = 1;
2020 
2021   /* stuff used in block assembly */
2022   b->barray       = 0;
2023 
2024   /* stuff used for matrix vector multiply */
2025   b->lvec         = 0;
2026   b->Mvctx        = 0;
2027 
2028   /* stuff for MatGetRow() */
2029   b->rowindices   = 0;
2030   b->rowvalues    = 0;
2031   b->getrowactive = PETSC_FALSE;
2032 
2033   /* hash table stuff */
2034   b->ht           = 0;
2035   b->hd           = 0;
2036   b->ht_size      = 0;
2037   b->ht_flag      = 0;
2038   b->ht_fact      = 0;
2039   b->ht_total_ct  = 0;
2040   b->ht_insert_ct = 0;
2041 
2042   *A = B;
2043   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2044   if (flg) {
2045     double fact = 1.39;
2046     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2047     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2048     if (fact <= 1.0) fact = 1.39;
2049     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2050     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2051   }
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNC__
2056 #define __FUNC__ "MatConvertSameType_MPIBAIJ"
2057 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
2058 {
2059   Mat         mat;
2060   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2061   int         ierr, len=0, flg;
2062 
2063   PetscFunctionBegin;
2064   *newmat       = 0;
2065   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
2066   PLogObjectCreate(mat);
2067   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2068   PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps));
2069   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2070   mat->ops->view       = MatView_MPIBAIJ;
2071   mat->factor     = matin->factor;
2072   mat->assembled  = PETSC_TRUE;
2073 
2074   a->m = mat->m   = oldmat->m;
2075   a->n = mat->n   = oldmat->n;
2076   a->M = mat->M   = oldmat->M;
2077   a->N = mat->N   = oldmat->N;
2078 
2079   a->bs  = oldmat->bs;
2080   a->bs2 = oldmat->bs2;
2081   a->mbs = oldmat->mbs;
2082   a->nbs = oldmat->nbs;
2083   a->Mbs = oldmat->Mbs;
2084   a->Nbs = oldmat->Nbs;
2085 
2086   a->rstart       = oldmat->rstart;
2087   a->rend         = oldmat->rend;
2088   a->cstart       = oldmat->cstart;
2089   a->cend         = oldmat->cend;
2090   a->size         = oldmat->size;
2091   a->rank         = oldmat->rank;
2092   mat->insertmode = NOT_SET_VALUES;
2093   a->rowvalues    = 0;
2094   a->getrowactive = PETSC_FALSE;
2095   a->barray       = 0;
2096 
2097   /* hash table stuff */
2098   a->ht           = 0;
2099   a->hd           = 0;
2100   a->ht_size      = 0;
2101   a->ht_flag      = oldmat->ht_flag;
2102   a->ht_fact      = oldmat->ht_fact;
2103   a->ht_total_ct  = 0;
2104   a->ht_insert_ct = 0;
2105 
2106 
2107   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2108   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2109   a->cowners = a->rowners + a->size + 2;
2110   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2111   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
2112   if (oldmat->colmap) {
2113     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2114     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2115     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2116   } else a->colmap = 0;
2117   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2118     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2119     PLogObjectMemory(mat,len*sizeof(int));
2120     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2121   } else a->garray = 0;
2122 
2123   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2124   PLogObjectParent(mat,a->lvec);
2125   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2126   PLogObjectParent(mat,a->Mvctx);
2127   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
2128   PLogObjectParent(mat,a->A);
2129   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
2130   PLogObjectParent(mat,a->B);
2131   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2132   if (flg) {
2133     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2134   }
2135   *newmat = mat;
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 #include "sys.h"
2140 
2141 #undef __FUNC__
2142 #define __FUNC__ "MatLoad_MPIBAIJ"
2143 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2144 {
2145   Mat          A;
2146   int          i, nz, ierr, j,rstart, rend, fd;
2147   Scalar       *vals,*buf;
2148   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2149   MPI_Status   status;
2150   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2151   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2152   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2153   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2154   int          dcount,kmax,k,nzcount,tmp;
2155 
2156   PetscFunctionBegin;
2157   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2158 
2159   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2160   if (!rank) {
2161     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2162     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2163     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2164     if (header[3] < 0) {
2165       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2166     }
2167   }
2168 
2169   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2170   M = header[1]; N = header[2];
2171 
2172   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2173 
2174   /*
2175      This code adds extra rows to make sure the number of rows is
2176      divisible by the blocksize
2177   */
2178   Mbs        = M/bs;
2179   extra_rows = bs - M + bs*(Mbs);
2180   if (extra_rows == bs) extra_rows = 0;
2181   else                  Mbs++;
2182   if (extra_rows &&!rank) {
2183     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2184   }
2185 
2186   /* determine ownership of all rows */
2187   mbs = Mbs/size + ((Mbs % size) > rank);
2188   m   = mbs * bs;
2189   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2190   browners = rowners + size + 1;
2191   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2192   rowners[0] = 0;
2193   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2194   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2195   rstart = rowners[rank];
2196   rend   = rowners[rank+1];
2197 
2198   /* distribute row lengths to all processors */
2199   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2200   if (!rank) {
2201     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2202     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2203     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2204     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2205     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2206     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2207     PetscFree(sndcounts);
2208   } else {
2209     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2210   }
2211 
2212   if (!rank) {
2213     /* calculate the number of nonzeros on each processor */
2214     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2215     PetscMemzero(procsnz,size*sizeof(int));
2216     for ( i=0; i<size; i++ ) {
2217       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2218         procsnz[i] += rowlengths[j];
2219       }
2220     }
2221     PetscFree(rowlengths);
2222 
2223     /* determine max buffer needed and allocate it */
2224     maxnz = 0;
2225     for ( i=0; i<size; i++ ) {
2226       maxnz = PetscMax(maxnz,procsnz[i]);
2227     }
2228     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2229 
2230     /* read in my part of the matrix column indices  */
2231     nz = procsnz[0];
2232     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2233     mycols = ibuf;
2234     if (size == 1)  nz -= extra_rows;
2235     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2236     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2237 
2238     /* read in every ones (except the last) and ship off */
2239     for ( i=1; i<size-1; i++ ) {
2240       nz   = procsnz[i];
2241       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2242       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2243     }
2244     /* read in the stuff for the last proc */
2245     if ( size != 1 ) {
2246       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2247       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2248       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2249       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2250     }
2251     PetscFree(cols);
2252   } else {
2253     /* determine buffer space needed for message */
2254     nz = 0;
2255     for ( i=0; i<m; i++ ) {
2256       nz += locrowlens[i];
2257     }
2258     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2259     mycols = ibuf;
2260     /* receive message of column indices*/
2261     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2262     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2263     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2264   }
2265 
2266   /* loop over local rows, determining number of off diagonal entries */
2267   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2268   odlens = dlens + (rend-rstart);
2269   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2270   PetscMemzero(mask,3*Mbs*sizeof(int));
2271   masked1 = mask    + Mbs;
2272   masked2 = masked1 + Mbs;
2273   rowcount = 0; nzcount = 0;
2274   for ( i=0; i<mbs; i++ ) {
2275     dcount  = 0;
2276     odcount = 0;
2277     for ( j=0; j<bs; j++ ) {
2278       kmax = locrowlens[rowcount];
2279       for ( k=0; k<kmax; k++ ) {
2280         tmp = mycols[nzcount++]/bs;
2281         if (!mask[tmp]) {
2282           mask[tmp] = 1;
2283           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2284           else masked1[dcount++] = tmp;
2285         }
2286       }
2287       rowcount++;
2288     }
2289 
2290     dlens[i]  = dcount;
2291     odlens[i] = odcount;
2292 
2293     /* zero out the mask elements we set */
2294     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2295     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2296   }
2297 
2298   /* create our matrix */
2299   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2300          CHKERRQ(ierr);
2301   A = *newmat;
2302   MatSetOption(A,MAT_COLUMNS_SORTED);
2303 
2304   if (!rank) {
2305     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2306     /* read in my part of the matrix numerical values  */
2307     nz = procsnz[0];
2308     vals = buf;
2309     mycols = ibuf;
2310     if (size == 1)  nz -= extra_rows;
2311     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2312     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2313 
2314     /* insert into matrix */
2315     jj      = rstart*bs;
2316     for ( i=0; i<m; i++ ) {
2317       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2318       mycols += locrowlens[i];
2319       vals   += locrowlens[i];
2320       jj++;
2321     }
2322     /* read in other processors (except the last one) and ship out */
2323     for ( i=1; i<size-1; i++ ) {
2324       nz   = procsnz[i];
2325       vals = buf;
2326       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2327       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2328     }
2329     /* the last proc */
2330     if ( size != 1 ){
2331       nz   = procsnz[i] - extra_rows;
2332       vals = buf;
2333       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2334       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2335       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2336     }
2337     PetscFree(procsnz);
2338   } else {
2339     /* receive numeric values */
2340     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2341 
2342     /* receive message of values*/
2343     vals   = buf;
2344     mycols = ibuf;
2345     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2346     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2347     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2348 
2349     /* insert into matrix */
2350     jj      = rstart*bs;
2351     for ( i=0; i<m; i++ ) {
2352       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2353       mycols += locrowlens[i];
2354       vals   += locrowlens[i];
2355       jj++;
2356     }
2357   }
2358   PetscFree(locrowlens);
2359   PetscFree(buf);
2360   PetscFree(ibuf);
2361   PetscFree(rowners);
2362   PetscFree(dlens);
2363   PetscFree(mask);
2364   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2365   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 
2370 
2371 #undef __FUNC__
2372 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2373 /*@
2374    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2375 
2376    Input Parameters:
2377 .  mat  - the matrix
2378 .  fact - factor
2379 
2380    Collective on Mat
2381 
2382   Notes:
2383    This can also be set by the command line option: -mat_use_hash_table fact
2384 
2385 .keywords: matrix, hashtable, factor, HT
2386 
2387 .seealso: MatSetOption()
2388 @*/
2389 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2390 {
2391   Mat_MPIBAIJ *baij;
2392 
2393   PetscFunctionBegin;
2394   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2395   if (mat->type != MATMPIBAIJ) {
2396       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2397   }
2398   baij = (Mat_MPIBAIJ*) mat->data;
2399   baij->ht_fact = fact;
2400   PetscFunctionReturn(0);
2401 }
2402