xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e26ad7d8ee88123bbdfe3b070a3e2d22b606b0ee)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mpibaij.c,v 1.135 1998/08/25 19:52:14 bsmith Exp bsmith $";
3 #endif
4 
5 #include "pinclude/pviewer.h"         /*I "mat.h" I*/
6 #include "src/mat/impls/baij/mpi/mpibaij.h"
7 #include "src/vec/vecimpl.h"
8 
9 
10 extern int MatSetUpMultiply_MPIBAIJ(Mat);
11 extern int DisAssemble_MPIBAIJ(Mat);
12 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
13 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
14 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
15 
16 /*
17      Local utility routine that creates a mapping from the global column
18    number to the local number in the off-diagonal part of the local
19    storage of the matrix.  This is done in a non scable way since the
20    length of colmap equals the global matrix length.
21 */
22 #undef __FUNC__
23 #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
24 static int CreateColmap_MPIBAIJ_Private(Mat mat)
25 {
26   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
28   int         nbs = B->nbs,i,bs=B->bs;;
29 
30   PetscFunctionBegin;
31   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
32   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
33   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
34   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
35   PetscFunctionReturn(0);
36 }
37 
38 #define CHUNKSIZE  10
39 
40 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
41 { \
42  \
43     brow = row/bs;  \
44     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
45     rmax = aimax[brow]; nrow = ailen[brow]; \
46       bcol = col/bs; \
47       ridx = row % bs; cidx = col % bs; \
48       low = 0; high = nrow; \
49       while (high-low > 3) { \
50         t = (low+high)/2; \
51         if (rp[t] > bcol) high = t; \
52         else              low  = t; \
53       } \
54       for ( _i=low; _i<high; _i++ ) { \
55         if (rp[_i] > bcol) break; \
56         if (rp[_i] == bcol) { \
57           bap  = ap +  bs2*_i + bs*cidx + ridx; \
58           if (addv == ADD_VALUES) *bap += value;  \
59           else                    *bap  = value;  \
60           goto a_noinsert; \
61         } \
62       } \
63       if (a->nonew == 1) goto a_noinsert; \
64       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
65       if (nrow >= rmax) { \
66         /* there is no extra room in row, therefore enlarge */ \
67         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
68         Scalar *new_a; \
69  \
70         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
71  \
72         /* malloc new storage space */ \
73         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
74         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
75         new_j   = (int *) (new_a + bs2*new_nz); \
76         new_i   = new_j + new_nz; \
77  \
78         /* copy over old data into new slots */ \
79         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
80         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
81         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
82         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
83         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
84                                                            len*sizeof(int)); \
85         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
86         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
87         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
88                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
89         /* free up old matrix storage */ \
90         PetscFree(a->a);  \
91         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
92         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
93         a->singlemalloc = 1; \
94  \
95         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
96         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
97         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
98         a->maxnz += bs2*CHUNKSIZE; \
99         a->reallocs++; \
100         a->nz++; \
101       } \
102       N = nrow++ - 1;  \
103       /* shift up all the later entries in this row */ \
104       for ( ii=N; ii>=_i; ii-- ) { \
105         rp[ii+1] = rp[ii]; \
106         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
107       } \
108       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
109       rp[_i]                      = bcol;  \
110       ap[bs2*_i + bs*cidx + ridx] = value;  \
111       a_noinsert:; \
112     ailen[brow] = nrow; \
113 }
114 
115 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
116 { \
117  \
118     brow = row/bs;  \
119     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
120     rmax = bimax[brow]; nrow = bilen[brow]; \
121       bcol = col/bs; \
122       ridx = row % bs; cidx = col % bs; \
123       low = 0; high = nrow; \
124       while (high-low > 3) { \
125         t = (low+high)/2; \
126         if (rp[t] > bcol) high = t; \
127         else              low  = t; \
128       } \
129       for ( _i=low; _i<high; _i++ ) { \
130         if (rp[_i] > bcol) break; \
131         if (rp[_i] == bcol) { \
132           bap  = ap +  bs2*_i + bs*cidx + ridx; \
133           if (addv == ADD_VALUES) *bap += value;  \
134           else                    *bap  = value;  \
135           goto b_noinsert; \
136         } \
137       } \
138       if (b->nonew == 1) goto b_noinsert; \
139       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
140       if (nrow >= rmax) { \
141         /* there is no extra room in row, therefore enlarge */ \
142         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
143         Scalar *new_a; \
144  \
145         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
146  \
147         /* malloc new storage space */ \
148         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
149         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
150         new_j   = (int *) (new_a + bs2*new_nz); \
151         new_i   = new_j + new_nz; \
152  \
153         /* copy over old data into new slots */ \
154         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
155         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
156         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
157         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
158         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
159                                                            len*sizeof(int)); \
160         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
161         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
162         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
163                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
164         /* free up old matrix storage */ \
165         PetscFree(b->a);  \
166         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
167         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
168         b->singlemalloc = 1; \
169  \
170         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
171         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
172         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
173         b->maxnz += bs2*CHUNKSIZE; \
174         b->reallocs++; \
175         b->nz++; \
176       } \
177       N = nrow++ - 1;  \
178       /* shift up all the later entries in this row */ \
179       for ( ii=N; ii>=_i; ii-- ) { \
180         rp[ii+1] = rp[ii]; \
181         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
182       } \
183       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
184       rp[_i]                      = bcol;  \
185       ap[bs2*_i + bs*cidx + ridx] = value;  \
186       b_noinsert:; \
187     bilen[brow] = nrow; \
188 }
189 
190 #undef __FUNC__
191 #define __FUNC__ "MatSetValues_MPIBAIJ"
192 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
193 {
194   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
195   Scalar      value;
196   int         ierr,i,j,row,col;
197   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
198   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
199   int         cend_orig=baij->cend_bs,bs=baij->bs;
200 
201   /* Some Variables required in the macro */
202   Mat         A = baij->A;
203   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
204   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   Scalar      *aa=a->a;
206 
207   Mat         B = baij->B;
208   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
209   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   Scalar      *ba=b->a;
211 
212   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   int         low,high,t,ridx,cidx,bs2=a->bs2;
214   Scalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   for ( i=0; i<m; i++ ) {
218 #if defined(USE_PETSC_BOPT_g)
219     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
220     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
221 #endif
222     if (im[i] >= rstart_orig && im[i] < rend_orig) {
223       row = im[i] - rstart_orig;
224       for ( j=0; j<n; j++ ) {
225         if (in[j] >= cstart_orig && in[j] < cend_orig){
226           col = in[j] - cstart_orig;
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231 #if defined(USE_PETSC_BOPT_g)
232         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
233         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
241             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
242               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
243               col =  in[j];
244               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
245               B = baij->B;
246               b = (Mat_SeqBAIJ *) (B)->data;
247               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
248               ba=b->a;
249             }
250           } else col = in[j];
251           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
252           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (roworiented && !baij->donotstash) {
258         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
259       } else {
260         if (!baij->donotstash) {
261           row = im[i];
262 	  for ( j=0; j<n; j++ ) {
263 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
264           }
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
273 #undef __FUNC__
274 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
275 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
276 {
277   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
278   Scalar      *value,*barray=baij->barray;
279   int         ierr,i,j,ii,jj,row,col,k,l;
280   int         roworiented = baij->roworiented,rstart=baij->rstart ;
281   int         rend=baij->rend,cstart=baij->cstart,stepval;
282   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
283 
284   if(!barray) {
285     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
286   }
287 
288   if (roworiented) {
289     stepval = (n-1)*bs;
290   } else {
291     stepval = (m-1)*bs;
292   }
293   for ( i=0; i<m; i++ ) {
294 #if defined(USE_PETSC_BOPT_g)
295     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
296     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
297 #endif
298     if (im[i] >= rstart && im[i] < rend) {
299       row = im[i] - rstart;
300       for ( j=0; j<n; j++ ) {
301         /* If NumCol = 1 then a copy is not required */
302         if ((roworiented) && (n == 1)) {
303           barray = v + i*bs2;
304         } else if((!roworiented) && (m == 1)) {
305           barray = v + j*bs2;
306         } else { /* Here a copy is required */
307           if (roworiented) {
308             value = v + i*(stepval+bs)*bs + j*bs;
309           } else {
310             value = v + j*(stepval+bs)*bs + i*bs;
311           }
312           for ( ii=0; ii<bs; ii++,value+=stepval ) {
313             for (jj=0; jj<bs; jj++ ) {
314               *barray++  = *value++;
315             }
316           }
317           barray -=bs2;
318         }
319 
320         if (in[j] >= cstart && in[j] < cend){
321           col  = in[j] - cstart;
322           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
323         }
324 #if defined(USE_PETSC_BOPT_g)
325         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
326         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
327 #endif
328         else {
329           if (mat->was_assembled) {
330             if (!baij->colmap) {
331               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
332             }
333 
334 #if defined(USE_PETSC_BOPT_g)
335             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
336 #endif
337             col = (baij->colmap[in[j]] - 1)/bs;
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (!baij->donotstash) {
349         if (roworiented ) {
350           row   = im[i]*bs;
351           value = v + i*(stepval+bs)*bs;
352           for ( j=0; j<bs; j++,row++ ) {
353             for ( k=0; k<n; k++ ) {
354               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
355                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
356               }
357             }
358           }
359         } else {
360           for ( j=0; j<n; j++ ) {
361             value = v + j*(stepval+bs)*bs + i*bs;
362             col   = in[j]*bs;
363             for ( k=0; k<bs; k++,col++,value+=stepval) {
364               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
365                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
366               }
367             }
368           }
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 #include <math.h>
376 #define HASH_KEY 0.6180339887
377 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
379 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNC__
381 #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
382 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
383 {
384   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
385   int         ierr,i,j,row,col;
386   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
387   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
388   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
389   double      tmp;
390   Scalar      ** HD = baij->hd,value;
391 #if defined(USE_PETSC_BOPT_g)
392   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
393 #endif
394 
395   PetscFunctionBegin;
396 
397   for ( i=0; i<m; i++ ) {
398 #if defined(USE_PETSC_BOPT_g)
399     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
400     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
401 #endif
402       row = im[i];
403     if (row >= rstart_orig && row < rend_orig) {
404       for ( j=0; j<n; j++ ) {
405         col = in[j];
406         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
407         /* Look up into the Hash Table */
408         key = (row/bs)*Nbs+(col/bs)+1;
409         h1  = HASH(size,key,tmp);
410 
411 
412         idx = h1;
413 #if defined(USE_PETSC_BOPT_g)
414         insert_ct++;
415         total_ct++;
416         if (HT[idx] != key) {
417           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
418           if (idx == size) {
419             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
420             if (idx == h1) {
421               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
422             }
423           }
424         }
425 #else
426         if (HT[idx] != key) {
427           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
428           if (idx == size) {
429             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
430             if (idx == h1) {
431               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
432             }
433           }
434         }
435 #endif
436         /* A HASH table entry is found, so insert the values at the correct address */
437         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
438         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
439       }
440     } else {
441       if (roworiented && !baij->donotstash) {
442         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
443       } else {
444         if (!baij->donotstash) {
445           row = im[i];
446 	  for ( j=0; j<n; j++ ) {
447 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
448           }
449         }
450       }
451     }
452   }
453 #if defined(USE_PETSC_BOPT_g)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNC__
461 #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
463 {
464   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
465   int         ierr,i,j,ii,jj,row,col,k,l;
466   int         roworiented = baij->roworiented,rstart=baij->rstart ;
467   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
468   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
469   double      tmp;
470   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
471 #if defined(USE_PETSC_BOPT_g)
472   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
473 #endif
474 
475   PetscFunctionBegin;
476 
477   if (roworiented) {
478     stepval = (n-1)*bs;
479   } else {
480     stepval = (m-1)*bs;
481   }
482   for ( i=0; i<m; i++ ) {
483 #if defined(USE_PETSC_BOPT_g)
484     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
485     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
486 #endif
487     row   = im[i];
488     v_t   = v + i*bs2;
489     if (row >= rstart && row < rend) {
490       for ( j=0; j<n; j++ ) {
491         col = in[j];
492 
493         /* Look up into the Hash Table */
494         key = row*Nbs+col+1;
495         h1  = HASH(size,key,tmp);
496 
497         idx = h1;
498 #if defined(USE_PETSC_BOPT_g)
499         total_ct++;
500         insert_ct++;
501        if (HT[idx] != key) {
502           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
503           if (idx == size) {
504             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
505             if (idx == h1) {
506               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
507             }
508           }
509         }
510 #else
511         if (HT[idx] != key) {
512           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
513           if (idx == size) {
514             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
515             if (idx == h1) {
516               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
517             }
518           }
519         }
520 #endif
521         baij_a = HD[idx];
522         if (roworiented) {
523           /*value = v + i*(stepval+bs)*bs + j*bs;*/
524           /* value = v + (i*(stepval+bs)+j)*bs; */
525           value = v_t;
526           v_t  += bs;
527           if (addv == ADD_VALUES) {
528             for ( ii=0; ii<bs; ii++,value+=stepval) {
529               for ( jj=ii; jj<bs2; jj+=bs ) {
530                 baij_a[jj]  += *value++;
531               }
532             }
533           } else {
534             for ( ii=0; ii<bs; ii++,value+=stepval) {
535               for ( jj=ii; jj<bs2; jj+=bs ) {
536                 baij_a[jj]  = *value++;
537               }
538             }
539           }
540         } else {
541           value = v + j*(stepval+bs)*bs + i*bs;
542           if (addv == ADD_VALUES) {
543             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
544               for ( jj=0; jj<bs; jj++ ) {
545                 baij_a[jj]  += *value++;
546               }
547             }
548           } else {
549             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
550               for ( jj=0; jj<bs; jj++ ) {
551                 baij_a[jj]  = *value++;
552               }
553             }
554           }
555         }
556       }
557     } else {
558       if (!baij->donotstash) {
559         if (roworiented ) {
560           row   = im[i]*bs;
561           value = v + i*(stepval+bs)*bs;
562           for ( j=0; j<bs; j++,row++ ) {
563             for ( k=0; k<n; k++ ) {
564               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
565                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
566               }
567             }
568           }
569         } else {
570           for ( j=0; j<n; j++ ) {
571             value = v + j*(stepval+bs)*bs + i*bs;
572             col   = in[j]*bs;
573             for ( k=0; k<bs; k++,col++,value+=stepval) {
574               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
575                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
576               }
577             }
578           }
579         }
580       }
581     }
582   }
583 #if defined(USE_PETSC_BOPT_g)
584   baij->ht_total_ct = total_ct;
585   baij->ht_insert_ct = insert_ct;
586 #endif
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNC__
591 #define __FUNC__ "MatGetValues_MPIBAIJ"
592 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
593 {
594   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
595   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
596   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
597 
598   PetscFunctionBegin;
599   for ( i=0; i<m; i++ ) {
600     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
601     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
602     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
603       row = idxm[i] - bsrstart;
604       for ( j=0; j<n; j++ ) {
605         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
606         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
607         if (idxn[j] >= bscstart && idxn[j] < bscend){
608           col = idxn[j] - bscstart;
609           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
610         } else {
611           if (!baij->colmap) {
612             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
613           }
614           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
615              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
616           else {
617             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
618             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
619           }
620         }
621       }
622     } else {
623       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
624     }
625   }
626  PetscFunctionReturn(0);
627 }
628 
629 #undef __FUNC__
630 #define __FUNC__ "MatNorm_MPIBAIJ"
631 int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
632 {
633   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
634   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
635   int        ierr, i,bs2=baij->bs2;
636   double     sum = 0.0;
637   Scalar     *v;
638 
639   PetscFunctionBegin;
640   if (baij->size == 1) {
641     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
642   } else {
643     if (type == NORM_FROBENIUS) {
644       v = amat->a;
645       for (i=0; i<amat->nz*bs2; i++ ) {
646 #if defined(USE_PETSC_COMPLEX)
647         sum += PetscReal(PetscConj(*v)*(*v)); v++;
648 #else
649         sum += (*v)*(*v); v++;
650 #endif
651       }
652       v = bmat->a;
653       for (i=0; i<bmat->nz*bs2; i++ ) {
654 #if defined(USE_PETSC_COMPLEX)
655         sum += PetscReal(PetscConj(*v)*(*v)); v++;
656 #else
657         sum += (*v)*(*v); v++;
658 #endif
659       }
660       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
661       *norm = sqrt(*norm);
662     } else {
663       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
664     }
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 #undef __FUNC__
670 #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
671 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
672 {
673   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
674   MPI_Comm    comm = mat->comm;
675   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
676   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
677   MPI_Request *send_waits,*recv_waits;
678   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
679   InsertMode  addv;
680   Scalar      *rvalues,*svalues;
681 
682   PetscFunctionBegin;
683   if (baij->donotstash) {
684     baij->svalues    = 0; baij->rvalues    = 0;
685     baij->nsends     = 0; baij->nrecvs     = 0;
686     baij->send_waits = 0; baij->recv_waits = 0;
687     baij->rmax       = 0;
688     PetscFunctionReturn(0);
689   }
690 
691   /* make sure all processors are either in INSERTMODE or ADDMODE */
692   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
693   if (addv == (ADD_VALUES|INSERT_VALUES)) {
694     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
695   }
696   mat->insertmode = addv; /* in case this processor had no cache */
697 
698   /*  first count number of contributors to each processor */
699   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
700   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
701   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
702   for ( i=0; i<baij->stash.n; i++ ) {
703     idx = baij->stash.idx[i];
704     for ( j=0; j<size; j++ ) {
705       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
706         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
707       }
708     }
709   }
710   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
711 
712   /* inform other processors of number of messages and max length*/
713   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
714   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
715   nreceives = work[rank];
716   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
717   nmax      = work[rank];
718   PetscFree(work);
719 
720   /* post receives:
721        1) each message will consist of ordered pairs
722      (global index,value) we store the global index as a double
723      to simplify the message passing.
724        2) since we don't know how long each individual message is we
725      allocate the largest needed buffer for each receive. Potentially
726      this is a lot of wasted space.
727 
728 
729        This could be done better.
730   */
731   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
732   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
733   for ( i=0; i<nreceives; i++ ) {
734     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
735   }
736 
737   /* do sends:
738       1) starts[i] gives the starting index in svalues for stuff going to
739          the ith processor
740   */
741   svalues    = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
742   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
743   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
744   starts[0] = 0;
745   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
746   for ( i=0; i<baij->stash.n; i++ ) {
747     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
748     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
749     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
750   }
751   PetscFree(owner);
752   starts[0] = 0;
753   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
754   count = 0;
755   for ( i=0; i<size; i++ ) {
756     if (procs[i]) {
757       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
758     }
759   }
760   PetscFree(starts); PetscFree(nprocs);
761 
762   /* Free cache space */
763   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
764   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
765 
766   baij->svalues    = svalues;    baij->rvalues    = rvalues;
767   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
768   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
769   baij->rmax       = nmax;
770 
771   PetscFunctionReturn(0);
772 }
773 
774 /*
775   Creates the hash table, and sets the table
776   This table is created only once.
777   If new entried need to be added to the matrix
778   then the hash table has to be destroyed and
779   recreated.
780 */
781 #undef __FUNC__
782 #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
783 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
784 {
785   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
786   Mat         A = baij->A, B=baij->B;
787   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
788   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
789   int         size,bs2=baij->bs2,rstart=baij->rstart;
790   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
791   int         *HT,key;
792   Scalar      **HD;
793   double      tmp;
794 #if defined(USE_PETSC_BOPT_g)
795   int         ct=0,max=0;
796 #endif
797 
798   PetscFunctionBegin;
799   baij->ht_size=(int)(factor*nz);
800   size = baij->ht_size;
801 
802   if (baij->ht) {
803     PetscFunctionReturn(0);
804   }
805 
806   /* Allocate Memory for Hash Table */
807   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
808   baij->ht = (int*)(baij->hd + size);
809   HD = baij->hd;
810   HT = baij->ht;
811 
812 
813   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
814 
815 
816   /* Loop Over A */
817   for ( i=0; i<a->mbs; i++ ) {
818     for ( j=ai[i]; j<ai[i+1]; j++ ) {
819       row = i+rstart;
820       col = aj[j]+cstart;
821 
822       key = row*Nbs + col + 1;
823       h1  = HASH(size,key,tmp);
824       for ( k=0; k<size; k++ ){
825         if (HT[(h1+k)%size] == 0.0) {
826           HT[(h1+k)%size] = key;
827           HD[(h1+k)%size] = a->a + j*bs2;
828           break;
829 #if defined(USE_PETSC_BOPT_g)
830         } else {
831           ct++;
832 #endif
833         }
834       }
835 #if defined(USE_PETSC_BOPT_g)
836       if (k> max) max = k;
837 #endif
838     }
839   }
840   /* Loop Over B */
841   for ( i=0; i<b->mbs; i++ ) {
842     for ( j=bi[i]; j<bi[i+1]; j++ ) {
843       row = i+rstart;
844       col = garray[bj[j]];
845       key = row*Nbs + col + 1;
846       h1  = HASH(size,key,tmp);
847       for ( k=0; k<size; k++ ){
848         if (HT[(h1+k)%size] == 0.0) {
849           HT[(h1+k)%size] = key;
850           HD[(h1+k)%size] = b->a + j*bs2;
851           break;
852 #if defined(USE_PETSC_BOPT_g)
853         } else {
854           ct++;
855 #endif
856         }
857       }
858 #if defined(USE_PETSC_BOPT_g)
859       if (k> max) max = k;
860 #endif
861     }
862   }
863 
864   /* Print Summary */
865 #if defined(USE_PETSC_BOPT_g)
866   for ( i=0,j=0; i<size; i++)
867     if (HT[i]) {j++;}
868   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
869            (j== 0)? 0.0:((double)(ct+j))/j,max);
870 #endif
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNC__
875 #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
876 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
877 {
878   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
879   MPI_Status  *send_status,recv_status;
880   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
881   int         bs=baij->bs,row,col,other_disassembled;
882   Scalar      *values,val;
883   InsertMode  addv = mat->insertmode;
884 
885   PetscFunctionBegin;
886   /*  wait on receives */
887   while (count) {
888     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
889     /* unpack receives into our local space */
890     values = baij->rvalues + 3*imdex*baij->rmax;
891     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
892     n = n/3;
893     for ( i=0; i<n; i++ ) {
894       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
895       col = (int) PetscReal(values[3*i+1]);
896       val = values[3*i+2];
897       if (col >= baij->cstart*bs && col < baij->cend*bs) {
898         col -= baij->cstart*bs;
899         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
900       } else {
901         if (mat->was_assembled) {
902           if (!baij->colmap) {
903             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
904           }
905           col = (baij->colmap[col/bs]) - 1 + col%bs;
906           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
907             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
908             col = (int) PetscReal(values[3*i+1]);
909           }
910         }
911         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
912       }
913     }
914     count--;
915   }
916   if (baij->recv_waits) PetscFree(baij->recv_waits);
917   if (baij->rvalues)    PetscFree(baij->rvalues);
918 
919   /* wait on sends */
920   if (baij->nsends) {
921     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
922     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
923     PetscFree(send_status);
924   }
925   if (baij->send_waits) PetscFree(baij->send_waits);
926   if (baij->svalues)    PetscFree(baij->svalues);
927 
928   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
930 
931   /* determine if any processor has disassembled, if so we must
932      also disassemble ourselfs, in order that we may reassemble. */
933   /*
934      if nonzero structure of submatrix B cannot change then we know that
935      no processor disassembled thus we can skip this stuff
936   */
937   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
938     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
939     if (mat->was_assembled && !other_disassembled) {
940       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
941     }
942   }
943 
944   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
945     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
946   }
947   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
948   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
949 
950 #if defined(USE_PETSC_BOPT_g)
951   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
952     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
953              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
954     baij->ht_total_ct  = 0;
955     baij->ht_insert_ct = 0;
956   }
957 #endif
958   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
959     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
960     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
961     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
962   }
963 
964   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNC__
969 #define __FUNC__ "MatView_MPIBAIJ_Binary"
970 static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
971 {
972   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
973   int          ierr;
974 
975   PetscFunctionBegin;
976   if (baij->size == 1) {
977     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
978   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNC__
983 #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
984 static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
985 {
986   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
987   int          ierr, format,rank,bs = baij->bs;
988   FILE         *fd;
989   ViewerType   vtype;
990 
991   PetscFunctionBegin;
992   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
993   if (vtype  == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) {
994     ierr = ViewerGetFormat(viewer,&format);
995     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
996       MatInfo info;
997       MPI_Comm_rank(mat->comm,&rank);
998       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
999       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
1000       PetscSequentialPhaseBegin(mat->comm,1);
1001       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1002               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1003               baij->bs,(int)info.memory);
1004       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
1005       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
1006       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
1007       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
1008       fflush(fd);
1009       PetscSequentialPhaseEnd(mat->comm,1);
1010       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
1011       PetscFunctionReturn(0);
1012     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1013       PetscPrintf(mat->comm,"  block size is %d\n",bs);
1014       PetscFunctionReturn(0);
1015     }
1016   }
1017 
1018   if (vtype == DRAW_VIEWER) {
1019     Draw       draw;
1020     PetscTruth isnull;
1021     ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr);
1022     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1023   }
1024 
1025   if (vtype == ASCII_FILE_VIEWER) {
1026     ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1027     PetscSequentialPhaseBegin(mat->comm,1);
1028     fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n",
1029            baij->rank,baij->m,baij->rstart*bs,baij->rend*bs,baij->n,
1030             baij->cstart*bs,baij->cend*bs);
1031     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1032     ierr = MatView(baij->B,viewer); CHKERRQ(ierr);
1033     fflush(fd);
1034     PetscSequentialPhaseEnd(mat->comm,1);
1035   } else {
1036     int size = baij->size;
1037     rank = baij->rank;
1038     if (size == 1) {
1039       ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1040     } else {
1041       /* assemble the entire matrix onto first processor. */
1042       Mat         A;
1043       Mat_SeqBAIJ *Aloc;
1044       int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
1045       int         mbs=baij->mbs;
1046       Scalar      *a;
1047 
1048       if (!rank) {
1049         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1050       } else {
1051         ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1052       }
1053       PLogObjectParent(mat,A);
1054 
1055       /* copy over the A part */
1056       Aloc = (Mat_SeqBAIJ*) baij->A->data;
1057       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1058       rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1059 
1060       for ( i=0; i<mbs; i++ ) {
1061         rvals[0] = bs*(baij->rstart + i);
1062         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1063         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1064           col = (baij->cstart+aj[j])*bs;
1065           for (k=0; k<bs; k++ ) {
1066             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1067             col++; a += bs;
1068           }
1069         }
1070       }
1071       /* copy over the B part */
1072       Aloc = (Mat_SeqBAIJ*) baij->B->data;
1073       ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1074       for ( i=0; i<mbs; i++ ) {
1075         rvals[0] = bs*(baij->rstart + i);
1076         for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1077         for ( j=ai[i]; j<ai[i+1]; j++ ) {
1078           col = baij->garray[aj[j]]*bs;
1079           for (k=0; k<bs; k++ ) {
1080             ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1081             col++; a += bs;
1082           }
1083         }
1084       }
1085       PetscFree(rvals);
1086       ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1087       ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1088       /*
1089          Everyone has to call to draw the matrix since the graphics waits are
1090          synchronized across all processors that share the Draw object
1091       */
1092       if (!rank || vtype == DRAW_VIEWER) {
1093         ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
1094       }
1095       ierr = MatDestroy(A); CHKERRQ(ierr);
1096     }
1097   }
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 
1102 
1103 #undef __FUNC__
1104 #define __FUNC__ "MatView_MPIBAIJ"
1105 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1106 {
1107   int         ierr;
1108   ViewerType  vtype;
1109 
1110   PetscFunctionBegin;
1111   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
1112   if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER ||
1113       vtype == DRAW_VIEWER       || vtype == MATLAB_VIEWER) {
1114     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
1115   } else if (vtype == BINARY_FILE_VIEWER) {
1116     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1117   } else {
1118     SETERRQ(1,1,"Viewer type not supported by PETSc object");
1119   }
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNC__
1124 #define __FUNC__ "MatDestroy_MPIBAIJ"
1125 int MatDestroy_MPIBAIJ(Mat mat)
1126 {
1127   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1128   int         ierr;
1129 
1130   PetscFunctionBegin;
1131   if (--mat->refct > 0) PetscFunctionReturn(0);
1132 
1133   if (mat->mapping) {
1134     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
1135   }
1136   if (mat->bmapping) {
1137     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
1138   }
1139   if (mat->rmap) {
1140     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1141   }
1142   if (mat->cmap) {
1143     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1144   }
1145 #if defined(USE_PETSC_LOG)
1146   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
1147 #endif
1148 
1149   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1150   PetscFree(baij->rowners);
1151   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1152   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1153   if (baij->colmap) PetscFree(baij->colmap);
1154   if (baij->garray) PetscFree(baij->garray);
1155   if (baij->lvec)   VecDestroy(baij->lvec);
1156   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
1157   if (baij->rowvalues) PetscFree(baij->rowvalues);
1158   if (baij->barray) PetscFree(baij->barray);
1159   if (baij->hd) PetscFree(baij->hd);
1160   PetscFree(baij);
1161   PLogObjectDestroy(mat);
1162   PetscHeaderDestroy(mat);
1163   PetscFunctionReturn(0);
1164 }
1165 
1166 #undef __FUNC__
1167 #define __FUNC__ "MatMult_MPIBAIJ"
1168 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1169 {
1170   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1171   int         ierr, nt;
1172 
1173   PetscFunctionBegin;
1174   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1175   if (nt != a->n) {
1176     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1177   }
1178   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1179   if (nt != a->m) {
1180     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1181   }
1182   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1183   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
1184   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1185   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
1186   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNC__
1191 #define __FUNC__ "MatMultAdd_MPIBAIJ"
1192 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1193 {
1194   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1195   int        ierr;
1196 
1197   PetscFunctionBegin;
1198   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1199   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1200   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1201   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNC__
1206 #define __FUNC__ "MatMultTrans_MPIBAIJ"
1207 int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1208 {
1209   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1210   int         ierr;
1211 
1212   PetscFunctionBegin;
1213   /* do nondiagonal part */
1214   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1215   /* send it on its way */
1216   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1217   /* do local part */
1218   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1219   /* receive remote parts: note this assumes the values are not actually */
1220   /* inserted in yy until the next line, which is true for my implementation*/
1221   /* but is not perhaps always true. */
1222   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNC__
1227 #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1228 int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1229 {
1230   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1231   int         ierr;
1232 
1233   PetscFunctionBegin;
1234   /* do nondiagonal part */
1235   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1236   /* send it on its way */
1237   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1238   /* do local part */
1239   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1240   /* receive remote parts: note this assumes the values are not actually */
1241   /* inserted in yy until the next line, which is true for my implementation*/
1242   /* but is not perhaps always true. */
1243   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 /*
1248   This only works correctly for square matrices where the subblock A->A is the
1249    diagonal block
1250 */
1251 #undef __FUNC__
1252 #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1253 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1254 {
1255   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1256   int         ierr;
1257 
1258   PetscFunctionBegin;
1259   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1260   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNC__
1265 #define __FUNC__ "MatScale_MPIBAIJ"
1266 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1267 {
1268   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1269   int         ierr;
1270 
1271   PetscFunctionBegin;
1272   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1273   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNC__
1278 #define __FUNC__ "MatGetSize_MPIBAIJ"
1279 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1280 {
1281   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1282 
1283   PetscFunctionBegin;
1284   if (m) *m = mat->M;
1285   if (n) *n = mat->N;
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #undef __FUNC__
1290 #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1291 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1292 {
1293   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1294 
1295   PetscFunctionBegin;
1296   *m = mat->m; *n = mat->n;
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNC__
1301 #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1302 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1303 {
1304   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1305 
1306   PetscFunctionBegin;
1307   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1312 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1313 
1314 #undef __FUNC__
1315 #define __FUNC__ "MatGetRow_MPIBAIJ"
1316 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1317 {
1318   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1319   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1320   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1321   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1322   int        *cmap, *idx_p,cstart = mat->cstart;
1323 
1324   PetscFunctionBegin;
1325   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1326   mat->getrowactive = PETSC_TRUE;
1327 
1328   if (!mat->rowvalues && (idx || v)) {
1329     /*
1330         allocate enough space to hold information from the longest row.
1331     */
1332     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1333     int     max = 1,mbs = mat->mbs,tmp;
1334     for ( i=0; i<mbs; i++ ) {
1335       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1336       if (max < tmp) { max = tmp; }
1337     }
1338     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1339     CHKPTRQ(mat->rowvalues);
1340     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1341   }
1342 
1343   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1344   lrow = row - brstart;
1345 
1346   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1347   if (!v)   {pvA = 0; pvB = 0;}
1348   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1349   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1350   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1351   nztot = nzA + nzB;
1352 
1353   cmap  = mat->garray;
1354   if (v  || idx) {
1355     if (nztot) {
1356       /* Sort by increasing column numbers, assuming A and B already sorted */
1357       int imark = -1;
1358       if (v) {
1359         *v = v_p = mat->rowvalues;
1360         for ( i=0; i<nzB; i++ ) {
1361           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1362           else break;
1363         }
1364         imark = i;
1365         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1366         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1367       }
1368       if (idx) {
1369         *idx = idx_p = mat->rowindices;
1370         if (imark > -1) {
1371           for ( i=0; i<imark; i++ ) {
1372             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1373           }
1374         } else {
1375           for ( i=0; i<nzB; i++ ) {
1376             if (cmap[cworkB[i]/bs] < cstart)
1377               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1378             else break;
1379           }
1380           imark = i;
1381         }
1382         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1383         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1384       }
1385     } else {
1386       if (idx) *idx = 0;
1387       if (v)   *v   = 0;
1388     }
1389   }
1390   *nz = nztot;
1391   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1392   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNC__
1397 #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1398 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1399 {
1400   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1401 
1402   PetscFunctionBegin;
1403   if (baij->getrowactive == PETSC_FALSE) {
1404     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1405   }
1406   baij->getrowactive = PETSC_FALSE;
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNC__
1411 #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1412 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1413 {
1414   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1415 
1416   PetscFunctionBegin;
1417   *bs = baij->bs;
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNC__
1422 #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1423 int MatZeroEntries_MPIBAIJ(Mat A)
1424 {
1425   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
1426   int         ierr;
1427 
1428   PetscFunctionBegin;
1429   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
1430   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNC__
1435 #define __FUNC__ "MatGetInfo_MPIBAIJ"
1436 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1437 {
1438   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
1439   Mat         A = a->A, B = a->B;
1440   int         ierr;
1441   double      isend[5], irecv[5];
1442 
1443   PetscFunctionBegin;
1444   info->block_size     = (double)a->bs;
1445   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
1446   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1447   isend[3] = info->memory;  isend[4] = info->mallocs;
1448   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
1449   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1450   isend[3] += info->memory;  isend[4] += info->mallocs;
1451   if (flag == MAT_LOCAL) {
1452     info->nz_used      = isend[0];
1453     info->nz_allocated = isend[1];
1454     info->nz_unneeded  = isend[2];
1455     info->memory       = isend[3];
1456     info->mallocs      = isend[4];
1457   } else if (flag == MAT_GLOBAL_MAX) {
1458     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1459     info->nz_used      = irecv[0];
1460     info->nz_allocated = irecv[1];
1461     info->nz_unneeded  = irecv[2];
1462     info->memory       = irecv[3];
1463     info->mallocs      = irecv[4];
1464   } else if (flag == MAT_GLOBAL_SUM) {
1465     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1466     info->nz_used      = irecv[0];
1467     info->nz_allocated = irecv[1];
1468     info->nz_unneeded  = irecv[2];
1469     info->memory       = irecv[3];
1470     info->mallocs      = irecv[4];
1471   }
1472   info->rows_global       = (double)a->M;
1473   info->columns_global    = (double)a->N;
1474   info->rows_local        = (double)a->m;
1475   info->columns_local     = (double)a->N;
1476   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1477   info->fill_ratio_needed = 0;
1478   info->factor_mallocs    = 0;
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNC__
1483 #define __FUNC__ "MatSetOption_MPIBAIJ"
1484 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1485 {
1486   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1487 
1488   PetscFunctionBegin;
1489   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1490       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1491       op == MAT_COLUMNS_UNSORTED ||
1492       op == MAT_COLUMNS_SORTED ||
1493       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1494       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1495         MatSetOption(a->A,op);
1496         MatSetOption(a->B,op);
1497   } else if (op == MAT_ROW_ORIENTED) {
1498         a->roworiented = 1;
1499         MatSetOption(a->A,op);
1500         MatSetOption(a->B,op);
1501   } else if (op == MAT_ROWS_SORTED ||
1502              op == MAT_ROWS_UNSORTED ||
1503              op == MAT_SYMMETRIC ||
1504              op == MAT_STRUCTURALLY_SYMMETRIC ||
1505              op == MAT_YES_NEW_DIAGONALS ||
1506              op == MAT_USE_HASH_TABLE)
1507     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1508   else if (op == MAT_COLUMN_ORIENTED) {
1509     a->roworiented = 0;
1510     MatSetOption(a->A,op);
1511     MatSetOption(a->B,op);
1512   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1513     a->donotstash = 1;
1514   } else if (op == MAT_NO_NEW_DIAGONALS) {
1515     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1516   } else if (op == MAT_USE_HASH_TABLE) {
1517     a->ht_flag = 1;
1518   } else {
1519     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNC__
1525 #define __FUNC__ "MatTranspose_MPIBAIJ("
1526 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1527 {
1528   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
1529   Mat_SeqBAIJ *Aloc;
1530   Mat        B;
1531   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1532   int        bs=baij->bs,mbs=baij->mbs;
1533   Scalar     *a;
1534 
1535   PetscFunctionBegin;
1536   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1537   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1538   CHKERRQ(ierr);
1539 
1540   /* copy over the A part */
1541   Aloc = (Mat_SeqBAIJ*) baij->A->data;
1542   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1543   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
1544 
1545   for ( i=0; i<mbs; i++ ) {
1546     rvals[0] = bs*(baij->rstart + i);
1547     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1548     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1549       col = (baij->cstart+aj[j])*bs;
1550       for (k=0; k<bs; k++ ) {
1551         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1552         col++; a += bs;
1553       }
1554     }
1555   }
1556   /* copy over the B part */
1557   Aloc = (Mat_SeqBAIJ*) baij->B->data;
1558   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1559   for ( i=0; i<mbs; i++ ) {
1560     rvals[0] = bs*(baij->rstart + i);
1561     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
1562     for ( j=ai[i]; j<ai[i+1]; j++ ) {
1563       col = baij->garray[aj[j]]*bs;
1564       for (k=0; k<bs; k++ ) {
1565         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1566         col++; a += bs;
1567       }
1568     }
1569   }
1570   PetscFree(rvals);
1571   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1572   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1573 
1574   if (matout != PETSC_NULL) {
1575     *matout = B;
1576   } else {
1577     PetscOps *Abops;
1578     MatOps   Aops;
1579 
1580     /* This isn't really an in-place transpose .... but free data structures from baij */
1581     PetscFree(baij->rowners);
1582     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
1583     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1584     if (baij->colmap) PetscFree(baij->colmap);
1585     if (baij->garray) PetscFree(baij->garray);
1586     if (baij->lvec) VecDestroy(baij->lvec);
1587     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
1588     PetscFree(baij);
1589 
1590     /*
1591        This is horrible, horrible code. We need to keep the
1592       A pointers for the bops and ops but copy everything
1593       else from C.
1594     */
1595     Abops = A->bops;
1596     Aops  = A->ops;
1597     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1598     A->bops = Abops;
1599     A->ops  = Aops;
1600 
1601     PetscHeaderDestroy(B);
1602   }
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #undef __FUNC__
1607 #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
1608 int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
1609 {
1610   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
1611   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
1612   int ierr,s1,s2,s3;
1613 
1614   PetscFunctionBegin;
1615   if (ll)  {
1616     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
1617     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1618     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
1619     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
1620     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
1621   }
1622   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
1623   PetscFunctionReturn(0);
1624 }
1625 
1626 #undef __FUNC__
1627 #define __FUNC__ "MatZeroRows_MPIBAIJ"
1628 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1629 {
1630   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
1631   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1632   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1633   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1634   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1635   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1636   MPI_Comm       comm = A->comm;
1637   MPI_Request    *send_waits,*recv_waits;
1638   MPI_Status     recv_status,*send_status;
1639   IS             istmp;
1640   PetscTruth     localdiag;
1641 
1642   PetscFunctionBegin;
1643   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
1644   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
1645 
1646   /*  first count number of contributors to each processor */
1647   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
1648   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
1649   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
1650   for ( i=0; i<N; i++ ) {
1651     idx = rows[i];
1652     found = 0;
1653     for ( j=0; j<size; j++ ) {
1654       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1655         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1656       }
1657     }
1658     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1659   }
1660   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
1661 
1662   /* inform other processors of number of messages and max length*/
1663   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1664   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1665   nrecvs = work[rank];
1666   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
1667   nmax   = work[rank];
1668   PetscFree(work);
1669 
1670   /* post receives:   */
1671   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1672   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1673   for ( i=0; i<nrecvs; i++ ) {
1674     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1675   }
1676 
1677   /* do sends:
1678      1) starts[i] gives the starting index in svalues for stuff going to
1679      the ith processor
1680   */
1681   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1682   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1683   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
1684   starts[0] = 0;
1685   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1686   for ( i=0; i<N; i++ ) {
1687     svalues[starts[owner[i]]++] = rows[i];
1688   }
1689   ISRestoreIndices(is,&rows);
1690 
1691   starts[0] = 0;
1692   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
1693   count = 0;
1694   for ( i=0; i<size; i++ ) {
1695     if (procs[i]) {
1696       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1697     }
1698   }
1699   PetscFree(starts);
1700 
1701   base = owners[rank]*bs;
1702 
1703   /*  wait on receives */
1704   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
1705   source = lens + nrecvs;
1706   count  = nrecvs; slen = 0;
1707   while (count) {
1708     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1709     /* unpack receives into our local space */
1710     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1711     source[imdex]  = recv_status.MPI_SOURCE;
1712     lens[imdex]  = n;
1713     slen += n;
1714     count--;
1715   }
1716   PetscFree(recv_waits);
1717 
1718   /* move the data into the send scatter */
1719   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
1720   count = 0;
1721   for ( i=0; i<nrecvs; i++ ) {
1722     values = rvalues + i*nmax;
1723     for ( j=0; j<lens[i]; j++ ) {
1724       lrows[count++] = values[j] - base;
1725     }
1726   }
1727   PetscFree(rvalues); PetscFree(lens);
1728   PetscFree(owner); PetscFree(nprocs);
1729 
1730   /* actually zap the local rows */
1731   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1732   PLogObjectParent(A,istmp);
1733 
1734   /*
1735         Zero the required rows. If the "diagonal block" of the matrix
1736      is square and the user wishes to set the diagonal we use seperate
1737      code so that MatSetValues() is not called for each diagonal allocating
1738      new memory, thus calling lots of mallocs and slowing things down.
1739 
1740        Contributed by: Mathew Knepley
1741   */
1742   localdiag = PETSC_FALSE;
1743   if (diag && (l->A->M == l->A->N)) {
1744     localdiag = PETSC_TRUE;
1745     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
1746   } else {
1747     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1748   }
1749   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
1750   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1751 
1752   if (diag && (localdiag == PETSC_FALSE)) {
1753     for ( i = 0; i < slen; i++ ) {
1754       row = lrows[i] + rstart_bs;
1755       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1756     }
1757     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1758     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1759   }
1760   PetscFree(lrows);
1761 
1762   /* wait on sends */
1763   if (nsends) {
1764     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1765     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1766     PetscFree(send_status);
1767   }
1768   PetscFree(send_waits); PetscFree(svalues);
1769 
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 extern int MatPrintHelp_SeqBAIJ(Mat);
1774 #undef __FUNC__
1775 #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1776 int MatPrintHelp_MPIBAIJ(Mat A)
1777 {
1778   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1779   MPI_Comm    comm = A->comm;
1780   static int  called = 0;
1781   int         ierr;
1782 
1783   PetscFunctionBegin;
1784   if (!a->rank) {
1785     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1786   }
1787   if (called) {PetscFunctionReturn(0);} else called = 1;
1788   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1789   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 #undef __FUNC__
1794 #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1795 int MatSetUnfactored_MPIBAIJ(Mat A)
1796 {
1797   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1798   int         ierr;
1799 
1800   PetscFunctionBegin;
1801   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 static int MatConvertSameType_MPIBAIJ(Mat,Mat *,int);
1806 
1807 /* -------------------------------------------------------------------*/
1808 static struct _MatOps MatOps_Values = {
1809   MatSetValues_MPIBAIJ,
1810   MatGetRow_MPIBAIJ,
1811   MatRestoreRow_MPIBAIJ,
1812   MatMult_MPIBAIJ,
1813   MatMultAdd_MPIBAIJ,
1814   MatMultTrans_MPIBAIJ,
1815   MatMultTransAdd_MPIBAIJ,
1816   0,
1817   0,
1818   0,
1819   0,
1820   0,
1821   0,
1822   0,
1823   MatTranspose_MPIBAIJ,
1824   MatGetInfo_MPIBAIJ,
1825   0,
1826   MatGetDiagonal_MPIBAIJ,
1827   MatDiagonalScale_MPIBAIJ,
1828   MatNorm_MPIBAIJ,
1829   MatAssemblyBegin_MPIBAIJ,
1830   MatAssemblyEnd_MPIBAIJ,
1831   0,
1832   MatSetOption_MPIBAIJ,
1833   MatZeroEntries_MPIBAIJ,
1834   MatZeroRows_MPIBAIJ,
1835   0,
1836   0,
1837   0,
1838   0,
1839   MatGetSize_MPIBAIJ,
1840   MatGetLocalSize_MPIBAIJ,
1841   MatGetOwnershipRange_MPIBAIJ,
1842   0,
1843   0,
1844   0,
1845   0,
1846   MatConvertSameType_MPIBAIJ,
1847   0,
1848   0,
1849   0,
1850   0,
1851   0,
1852   MatGetSubMatrices_MPIBAIJ,
1853   MatIncreaseOverlap_MPIBAIJ,
1854   MatGetValues_MPIBAIJ,
1855   0,
1856   MatPrintHelp_MPIBAIJ,
1857   MatScale_MPIBAIJ,
1858   0,
1859   0,
1860   0,
1861   MatGetBlockSize_MPIBAIJ,
1862   0,
1863   0,
1864   0,
1865   0,
1866   0,
1867   0,
1868   MatSetUnfactored_MPIBAIJ,
1869   0,
1870   MatSetValuesBlocked_MPIBAIJ,
1871   0,
1872   0,
1873   0,
1874   MatGetMaps_Petsc};
1875 
1876 
1877 #undef __FUNC__
1878 #define __FUNC__ "MatCreateMPIBAIJ"
1879 /*@C
1880    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1881    (block compressed row).  For good matrix assembly performance
1882    the user should preallocate the matrix storage by setting the parameters
1883    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1884    performance can be increased by more than a factor of 50.
1885 
1886    Collective on MPI_Comm
1887 
1888    Input Parameters:
1889 +  comm - MPI communicator
1890 .  bs   - size of blockk
1891 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1892            This value should be the same as the local size used in creating the
1893            y vector for the matrix-vector product y = Ax.
1894 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1895            This value should be the same as the local size used in creating the
1896            x vector for the matrix-vector product y = Ax.
1897 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1898 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1899 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1900            submatrix  (same for all local rows)
1901 .  d_nzz - array containing the number of block nonzeros in the various block rows
1902            of the in diagonal portion of the local (possibly different for each block
1903            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1904 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1905            submatrix (same for all local rows).
1906 -  o_nzz - array containing the number of nonzeros in the various block rows of the
1907            off-diagonal portion of the local submatrix (possibly different for
1908            each block row) or PETSC_NULL.
1909 
1910    Output Parameter:
1911 .  A - the matrix
1912 
1913    Options Database Keys:
1914 .   -mat_no_unroll - uses code that does not unroll the loops in the
1915                      block calculations (much slower)
1916 .   -mat_block_size - size of the blocks to use
1917 .   -mat_mpi - use the parallel matrix data structures even on one processor
1918                (defaults to using SeqBAIJ format on one processor)
1919 
1920    Notes:
1921    The user MUST specify either the local or global matrix dimensions
1922    (possibly both).
1923 
1924    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1925    than it must be used on all processors that share the object for that argument.
1926 
1927    Storage Information:
1928    For a square global matrix we define each processor's diagonal portion
1929    to be its local rows and the corresponding columns (a square submatrix);
1930    each processor's off-diagonal portion encompasses the remainder of the
1931    local matrix (a rectangular submatrix).
1932 
1933    The user can specify preallocated storage for the diagonal part of
1934    the local submatrix with either d_nz or d_nnz (not both).  Set
1935    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1936    memory allocation.  Likewise, specify preallocated storage for the
1937    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1938 
1939    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1940    the figure below we depict these three local rows and all columns (0-11).
1941 
1942 .vb
1943            0 1 2 3 4 5 6 7 8 9 10 11
1944           -------------------
1945    row 3  |  o o o d d d o o o o o o
1946    row 4  |  o o o d d d o o o o o o
1947    row 5  |  o o o d d d o o o o o o
1948           -------------------
1949 .ve
1950 
1951    Thus, any entries in the d locations are stored in the d (diagonal)
1952    submatrix, and any entries in the o locations are stored in the
1953    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1954    stored simply in the MATSEQBAIJ format for compressed row storage.
1955 
1956    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1957    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1958    In general, for PDE problems in which most nonzeros are near the diagonal,
1959    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1960    or you will get TERRIBLE performance; see the users' manual chapter on
1961    matrices.
1962 
1963 .keywords: matrix, block, aij, compressed row, sparse, parallel
1964 
1965 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
1966 @*/
1967 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
1968                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1969 {
1970   Mat          B;
1971   Mat_MPIBAIJ  *b;
1972   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1973   int          flag1 = 0,flag2 = 0;
1974 
1975   PetscFunctionBegin;
1976   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
1977 
1978   MPI_Comm_size(comm,&size);
1979   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
1980   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1981   if (!flag1 && !flag2 && size == 1) {
1982     if (M == PETSC_DECIDE) M = m;
1983     if (N == PETSC_DECIDE) N = n;
1984     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
1985     PetscFunctionReturn(0);
1986   }
1987 
1988   *A = 0;
1989   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,comm,MatDestroy,MatView);
1990   PLogObjectCreate(B);
1991   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
1992   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1993   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1994 
1995   B->ops->destroy    = MatDestroy_MPIBAIJ;
1996   B->ops->view       = MatView_MPIBAIJ;
1997   B->mapping    = 0;
1998   B->factor     = 0;
1999   B->assembled  = PETSC_FALSE;
2000 
2001   B->insertmode = NOT_SET_VALUES;
2002   MPI_Comm_rank(comm,&b->rank);
2003   MPI_Comm_size(comm,&b->size);
2004 
2005   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2006     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2007   }
2008   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2009     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2010   }
2011   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2012     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2013   }
2014   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2015   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2016 
2017   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2018     work[0] = m; work[1] = n;
2019     mbs = m/bs; nbs = n/bs;
2020     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
2021     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2022     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2023   }
2024   if (m == PETSC_DECIDE) {
2025     Mbs = M/bs;
2026     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2027     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2028     m   = mbs*bs;
2029   }
2030   if (n == PETSC_DECIDE) {
2031     Nbs = N/bs;
2032     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2033     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2034     n   = nbs*bs;
2035   }
2036   if (mbs*bs != m || nbs*bs != n) {
2037     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2038   }
2039 
2040   b->m = m; B->m = m;
2041   b->n = n; B->n = n;
2042   b->N = N; B->N = N;
2043   b->M = M; B->M = M;
2044   b->bs  = bs;
2045   b->bs2 = bs*bs;
2046   b->mbs = mbs;
2047   b->nbs = nbs;
2048   b->Mbs = Mbs;
2049   b->Nbs = Nbs;
2050 
2051   /* the information in the maps duplicates the information computed below, eventually
2052      we should remove the duplicate information that is not contained in the maps */
2053   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2054   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2055 
2056   /* build local table of row and column ownerships */
2057   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
2058   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2059   b->cowners = b->rowners + b->size + 2;
2060   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2061   b->rowners[0] = 0;
2062   for ( i=2; i<=b->size; i++ ) {
2063     b->rowners[i] += b->rowners[i-1];
2064   }
2065   b->rstart    = b->rowners[b->rank];
2066   b->rend      = b->rowners[b->rank+1];
2067   b->rstart_bs = b->rstart * bs;
2068   b->rend_bs   = b->rend * bs;
2069 
2070   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2071   b->cowners[0] = 0;
2072   for ( i=2; i<=b->size; i++ ) {
2073     b->cowners[i] += b->cowners[i-1];
2074   }
2075   b->cstart    = b->cowners[b->rank];
2076   b->cend      = b->cowners[b->rank+1];
2077   b->cstart_bs = b->cstart * bs;
2078   b->cend_bs   = b->cend * bs;
2079 
2080 
2081   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2082   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
2083   PLogObjectParent(B,b->A);
2084   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2085   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
2086   PLogObjectParent(B,b->B);
2087 
2088   /* build cache for off array entries formed */
2089   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
2090   b->donotstash  = 0;
2091   b->colmap      = 0;
2092   b->garray      = 0;
2093   b->roworiented = 1;
2094 
2095   /* stuff used in block assembly */
2096   b->barray       = 0;
2097 
2098   /* stuff used for matrix vector multiply */
2099   b->lvec         = 0;
2100   b->Mvctx        = 0;
2101 
2102   /* stuff for MatGetRow() */
2103   b->rowindices   = 0;
2104   b->rowvalues    = 0;
2105   b->getrowactive = PETSC_FALSE;
2106 
2107   /* hash table stuff */
2108   b->ht           = 0;
2109   b->hd           = 0;
2110   b->ht_size      = 0;
2111   b->ht_flag      = 0;
2112   b->ht_fact      = 0;
2113   b->ht_total_ct  = 0;
2114   b->ht_insert_ct = 0;
2115 
2116   *A = B;
2117   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2118   if (flg) {
2119     double fact = 1.39;
2120     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2121     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2122     if (fact <= 1.0) fact = 1.39;
2123     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2124     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2125   }
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 #undef __FUNC__
2130 #define __FUNC__ "MatConvertSameType_MPIBAIJ"
2131 static int MatConvertSameType_MPIBAIJ(Mat matin,Mat *newmat,int cpvalues)
2132 {
2133   Mat         mat;
2134   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
2135   int         ierr, len=0, flg;
2136 
2137   PetscFunctionBegin;
2138   *newmat       = 0;
2139   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,matin->comm,MatDestroy,MatView);
2140   PLogObjectCreate(mat);
2141   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2142   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2143   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2144   mat->ops->view       = MatView_MPIBAIJ;
2145   mat->factor     = matin->factor;
2146   mat->assembled  = PETSC_TRUE;
2147 
2148   a->m = mat->m   = oldmat->m;
2149   a->n = mat->n   = oldmat->n;
2150   a->M = mat->M   = oldmat->M;
2151   a->N = mat->N   = oldmat->N;
2152 
2153   a->bs  = oldmat->bs;
2154   a->bs2 = oldmat->bs2;
2155   a->mbs = oldmat->mbs;
2156   a->nbs = oldmat->nbs;
2157   a->Mbs = oldmat->Mbs;
2158   a->Nbs = oldmat->Nbs;
2159 
2160   a->rstart       = oldmat->rstart;
2161   a->rend         = oldmat->rend;
2162   a->cstart       = oldmat->cstart;
2163   a->cend         = oldmat->cend;
2164   a->size         = oldmat->size;
2165   a->rank         = oldmat->rank;
2166   mat->insertmode = NOT_SET_VALUES;
2167   a->rowvalues    = 0;
2168   a->getrowactive = PETSC_FALSE;
2169   a->barray       = 0;
2170 
2171   /* hash table stuff */
2172   a->ht           = 0;
2173   a->hd           = 0;
2174   a->ht_size      = 0;
2175   a->ht_flag      = oldmat->ht_flag;
2176   a->ht_fact      = oldmat->ht_fact;
2177   a->ht_total_ct  = 0;
2178   a->ht_insert_ct = 0;
2179 
2180 
2181   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2182   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2183   a->cowners = a->rowners + a->size + 2;
2184   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2185   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
2186   if (oldmat->colmap) {
2187     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2188     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2189     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2190   } else a->colmap = 0;
2191   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
2192     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
2193     PLogObjectMemory(mat,len*sizeof(int));
2194     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2195   } else a->garray = 0;
2196 
2197   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
2198   PLogObjectParent(mat,a->lvec);
2199   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2200   PLogObjectParent(mat,a->Mvctx);
2201   ierr =  MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr);
2202   PLogObjectParent(mat,a->A);
2203   ierr =  MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr);
2204   PLogObjectParent(mat,a->B);
2205   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2206   if (flg) {
2207     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
2208   }
2209   *newmat = mat;
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 #include "sys.h"
2214 
2215 #undef __FUNC__
2216 #define __FUNC__ "MatLoad_MPIBAIJ"
2217 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2218 {
2219   Mat          A;
2220   int          i, nz, ierr, j,rstart, rend, fd;
2221   Scalar       *vals,*buf;
2222   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2223   MPI_Status   status;
2224   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2225   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
2226   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2227   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2228   int          dcount,kmax,k,nzcount,tmp;
2229 
2230   PetscFunctionBegin;
2231   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
2232 
2233   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
2234   if (!rank) {
2235     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2236     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2237     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2238     if (header[3] < 0) {
2239       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2240     }
2241   }
2242 
2243   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2244   M = header[1]; N = header[2];
2245 
2246   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2247 
2248   /*
2249      This code adds extra rows to make sure the number of rows is
2250      divisible by the blocksize
2251   */
2252   Mbs        = M/bs;
2253   extra_rows = bs - M + bs*(Mbs);
2254   if (extra_rows == bs) extra_rows = 0;
2255   else                  Mbs++;
2256   if (extra_rows &&!rank) {
2257     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2258   }
2259 
2260   /* determine ownership of all rows */
2261   mbs = Mbs/size + ((Mbs % size) > rank);
2262   m   = mbs * bs;
2263   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2264   browners = rowners + size + 1;
2265   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2266   rowners[0] = 0;
2267   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2268   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
2269   rstart = rowners[rank];
2270   rend   = rowners[rank+1];
2271 
2272   /* distribute row lengths to all processors */
2273   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
2274   if (!rank) {
2275     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2276     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
2277     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
2278     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2279     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2280     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2281     PetscFree(sndcounts);
2282   } else {
2283     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
2284   }
2285 
2286   if (!rank) {
2287     /* calculate the number of nonzeros on each processor */
2288     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
2289     PetscMemzero(procsnz,size*sizeof(int));
2290     for ( i=0; i<size; i++ ) {
2291       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
2292         procsnz[i] += rowlengths[j];
2293       }
2294     }
2295     PetscFree(rowlengths);
2296 
2297     /* determine max buffer needed and allocate it */
2298     maxnz = 0;
2299     for ( i=0; i<size; i++ ) {
2300       maxnz = PetscMax(maxnz,procsnz[i]);
2301     }
2302     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
2303 
2304     /* read in my part of the matrix column indices  */
2305     nz = procsnz[0];
2306     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2307     mycols = ibuf;
2308     if (size == 1)  nz -= extra_rows;
2309     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2310     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2311 
2312     /* read in every ones (except the last) and ship off */
2313     for ( i=1; i<size-1; i++ ) {
2314       nz   = procsnz[i];
2315       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2316       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2317     }
2318     /* read in the stuff for the last proc */
2319     if ( size != 1 ) {
2320       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2321       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2322       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2323       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2324     }
2325     PetscFree(cols);
2326   } else {
2327     /* determine buffer space needed for message */
2328     nz = 0;
2329     for ( i=0; i<m; i++ ) {
2330       nz += locrowlens[i];
2331     }
2332     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
2333     mycols = ibuf;
2334     /* receive message of column indices*/
2335     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2336     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2337     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2338   }
2339 
2340   /* loop over local rows, determining number of off diagonal entries */
2341   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2342   odlens = dlens + (rend-rstart);
2343   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2344   PetscMemzero(mask,3*Mbs*sizeof(int));
2345   masked1 = mask    + Mbs;
2346   masked2 = masked1 + Mbs;
2347   rowcount = 0; nzcount = 0;
2348   for ( i=0; i<mbs; i++ ) {
2349     dcount  = 0;
2350     odcount = 0;
2351     for ( j=0; j<bs; j++ ) {
2352       kmax = locrowlens[rowcount];
2353       for ( k=0; k<kmax; k++ ) {
2354         tmp = mycols[nzcount++]/bs;
2355         if (!mask[tmp]) {
2356           mask[tmp] = 1;
2357           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
2358           else masked1[dcount++] = tmp;
2359         }
2360       }
2361       rowcount++;
2362     }
2363 
2364     dlens[i]  = dcount;
2365     odlens[i] = odcount;
2366 
2367     /* zero out the mask elements we set */
2368     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
2369     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
2370   }
2371 
2372   /* create our matrix */
2373   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2374          CHKERRQ(ierr);
2375   A = *newmat;
2376   MatSetOption(A,MAT_COLUMNS_SORTED);
2377 
2378   if (!rank) {
2379     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
2380     /* read in my part of the matrix numerical values  */
2381     nz = procsnz[0];
2382     vals = buf;
2383     mycols = ibuf;
2384     if (size == 1)  nz -= extra_rows;
2385     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2386     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2387 
2388     /* insert into matrix */
2389     jj      = rstart*bs;
2390     for ( i=0; i<m; i++ ) {
2391       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2392       mycols += locrowlens[i];
2393       vals   += locrowlens[i];
2394       jj++;
2395     }
2396     /* read in other processors (except the last one) and ship out */
2397     for ( i=1; i<size-1; i++ ) {
2398       nz   = procsnz[i];
2399       vals = buf;
2400       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2401       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2402     }
2403     /* the last proc */
2404     if ( size != 1 ){
2405       nz   = procsnz[i] - extra_rows;
2406       vals = buf;
2407       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2408       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2409       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2410     }
2411     PetscFree(procsnz);
2412   } else {
2413     /* receive numeric values */
2414     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
2415 
2416     /* receive message of values*/
2417     vals   = buf;
2418     mycols = ibuf;
2419     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2420     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2421     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2422 
2423     /* insert into matrix */
2424     jj      = rstart*bs;
2425     for ( i=0; i<m; i++ ) {
2426       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2427       mycols += locrowlens[i];
2428       vals   += locrowlens[i];
2429       jj++;
2430     }
2431   }
2432   PetscFree(locrowlens);
2433   PetscFree(buf);
2434   PetscFree(ibuf);
2435   PetscFree(rowners);
2436   PetscFree(dlens);
2437   PetscFree(mask);
2438   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2439   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 
2444 
2445 #undef __FUNC__
2446 #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2447 /*@
2448    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2449 
2450    Input Parameters:
2451 .  mat  - the matrix
2452 .  fact - factor
2453 
2454    Collective on Mat
2455 
2456   Notes:
2457    This can also be set by the command line option: -mat_use_hash_table fact
2458 
2459 .keywords: matrix, hashtable, factor, HT
2460 
2461 .seealso: MatSetOption()
2462 @*/
2463 int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2464 {
2465   Mat_MPIBAIJ *baij;
2466 
2467   PetscFunctionBegin;
2468   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2469   if (mat->type != MATMPIBAIJ) {
2470       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2471   }
2472   baij = (Mat_MPIBAIJ*) mat->data;
2473   baij->ht_fact = fact;
2474   PetscFunctionReturn(0);
2475 }
2476