xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision b1fc97648b54ed25ceb8d2b548e0bfd84a6d6c06)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*b1fc9764SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.141 1999/01/08 18:15:39 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
577ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
957b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
10d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
12946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
133b2fbd54SBarry Smith 
14537820f0SBarry Smith /*
15537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
1657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
1757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
1857b952d6SSatish Balay    length of colmap equals the global matrix length.
1957b952d6SSatish Balay */
205615d1e5SSatish Balay #undef __FUNC__
215615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
2257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2357b952d6SSatish Balay {
2457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
2648e59246SSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
2757b952d6SSatish Balay 
28d64ed03dSBarry Smith   PetscFunctionBegin;
2948e59246SSatish Balay #if defined (USE_CTABLE)
3048e59246SSatish Balay   ierr = TableCreate( &baij->colmap, baij->nbs/5 ); CHKERRQ(ierr);
3148e59246SSatish Balay   for ( i=0; i<nbs; i++ ){
3248e59246SSatish Balay     ierr = TableAdd( baij->colmap, baij->garray[i] + 1, i*bs+1 );CHKERRQ(ierr);
3348e59246SSatish Balay   }
3448e59246SSatish Balay #else
35758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3657b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3757b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
38928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
3948e59246SSatish Balay #endif
403a40ed3dSBarry Smith   PetscFunctionReturn(0);
4157b952d6SSatish Balay }
4257b952d6SSatish Balay 
4380c1aa95SSatish Balay #define CHUNKSIZE  10
4480c1aa95SSatish Balay 
45f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
4680c1aa95SSatish Balay { \
4780c1aa95SSatish Balay  \
4880c1aa95SSatish Balay     brow = row/bs;  \
4980c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
50ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
5180c1aa95SSatish Balay       bcol = col/bs; \
5280c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
53ab26458aSBarry Smith       low = 0; high = nrow; \
54ab26458aSBarry Smith       while (high-low > 3) { \
55ab26458aSBarry Smith         t = (low+high)/2; \
56ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
57ab26458aSBarry Smith         else              low  = t; \
58ab26458aSBarry Smith       } \
59ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
6080c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
6180c1aa95SSatish Balay         if (rp[_i] == bcol) { \
6280c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
63eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
64eada6651SSatish Balay           else                    *bap  = value;  \
65ac7a638eSSatish Balay           goto a_noinsert; \
6680c1aa95SSatish Balay         } \
6780c1aa95SSatish Balay       } \
6889280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
69a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
7080c1aa95SSatish Balay       if (nrow >= rmax) { \
7180c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
7280c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
7380c1aa95SSatish Balay         Scalar *new_a; \
7480c1aa95SSatish Balay  \
75a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
7689280ab3SLois Curfman McInnes  \
7780c1aa95SSatish Balay         /* malloc new storage space */ \
7880c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
7980c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
8080c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
8180c1aa95SSatish Balay         new_i   = new_j + new_nz; \
8280c1aa95SSatish Balay  \
8380c1aa95SSatish Balay         /* copy over old data into new slots */ \
8480c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
8580c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
8680c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
8780c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
8880c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
8980c1aa95SSatish Balay                                                            len*sizeof(int)); \
9080c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
9180c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
9280c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
9380c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
9480c1aa95SSatish Balay         /* free up old matrix storage */ \
9580c1aa95SSatish Balay         PetscFree(a->a);  \
9680c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
9780c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
9880c1aa95SSatish Balay         a->singlemalloc = 1; \
9980c1aa95SSatish Balay  \
10080c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
101ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
10280c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
10380c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
10480c1aa95SSatish Balay         a->reallocs++; \
10580c1aa95SSatish Balay         a->nz++; \
10680c1aa95SSatish Balay       } \
10780c1aa95SSatish Balay       N = nrow++ - 1;  \
10880c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
10980c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
11080c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
11180c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
11280c1aa95SSatish Balay       } \
11380c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
11480c1aa95SSatish Balay       rp[_i]                      = bcol;  \
11580c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
116ac7a638eSSatish Balay       a_noinsert:; \
11780c1aa95SSatish Balay     ailen[brow] = nrow; \
11880c1aa95SSatish Balay }
11957b952d6SSatish Balay 
120ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
121ac7a638eSSatish Balay { \
122ac7a638eSSatish Balay  \
123ac7a638eSSatish Balay     brow = row/bs;  \
124ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
125ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
126ac7a638eSSatish Balay       bcol = col/bs; \
127ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
128ac7a638eSSatish Balay       low = 0; high = nrow; \
129ac7a638eSSatish Balay       while (high-low > 3) { \
130ac7a638eSSatish Balay         t = (low+high)/2; \
131ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
132ac7a638eSSatish Balay         else              low  = t; \
133ac7a638eSSatish Balay       } \
134ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
135ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
136ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
137ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
138ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
139ac7a638eSSatish Balay           else                    *bap  = value;  \
140ac7a638eSSatish Balay           goto b_noinsert; \
141ac7a638eSSatish Balay         } \
142ac7a638eSSatish Balay       } \
14389280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
144a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
145ac7a638eSSatish Balay       if (nrow >= rmax) { \
146ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
14774c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
148ac7a638eSSatish Balay         Scalar *new_a; \
149ac7a638eSSatish Balay  \
150a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
15189280ab3SLois Curfman McInnes  \
152ac7a638eSSatish Balay         /* malloc new storage space */ \
15374c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
154ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
155ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
156ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
157ac7a638eSSatish Balay  \
158ac7a638eSSatish Balay         /* copy over old data into new slots */ \
159ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
16074c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
161ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
162ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
163ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
164ac7a638eSSatish Balay                                                            len*sizeof(int)); \
165ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
166ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
167ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
168ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
169ac7a638eSSatish Balay         /* free up old matrix storage */ \
17074c639caSSatish Balay         PetscFree(b->a);  \
17174c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
17274c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
17374c639caSSatish Balay         b->singlemalloc = 1; \
174ac7a638eSSatish Balay  \
175ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
176ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
17774c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
17874c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
17974c639caSSatish Balay         b->reallocs++; \
18074c639caSSatish Balay         b->nz++; \
181ac7a638eSSatish Balay       } \
182ac7a638eSSatish Balay       N = nrow++ - 1;  \
183ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
184ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
185ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
186ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
187ac7a638eSSatish Balay       } \
188ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
189ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
190ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
191ac7a638eSSatish Balay       b_noinsert:; \
192ac7a638eSSatish Balay     bilen[brow] = nrow; \
193ac7a638eSSatish Balay }
194ac7a638eSSatish Balay 
1955615d1e5SSatish Balay #undef __FUNC__
1965615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
197ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
19857b952d6SSatish Balay {
19957b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
20057b952d6SSatish Balay   Scalar      value;
2014fa0d573SSatish Balay   int         ierr,i,j,row,col;
2024fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2034fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2044fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
20557b952d6SSatish Balay 
206eada6651SSatish Balay   /* Some Variables required in the macro */
20780c1aa95SSatish Balay   Mat         A = baij->A;
20880c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
209ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
210ac7a638eSSatish Balay   Scalar      *aa=a->a;
211ac7a638eSSatish Balay 
212ac7a638eSSatish Balay   Mat         B = baij->B;
213ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
214ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
215ac7a638eSSatish Balay   Scalar      *ba=b->a;
216ac7a638eSSatish Balay 
217ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
218ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
219ac7a638eSSatish Balay   Scalar      *ap,*bap;
22080c1aa95SSatish Balay 
221d64ed03dSBarry Smith   PetscFunctionBegin;
22257b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
2233a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
224a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
225a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
226639f9d9dSBarry Smith #endif
22757b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
22857b952d6SSatish Balay       row = im[i] - rstart_orig;
22957b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
23057b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
23157b952d6SSatish Balay           col = in[j] - cstart_orig;
23257b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
233f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
23480c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
23557b952d6SSatish Balay         }
2363a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
237a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
238a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
239639f9d9dSBarry Smith #endif
24057b952d6SSatish Balay         else {
24157b952d6SSatish Balay           if (mat->was_assembled) {
242905e6a2fSBarry Smith             if (!baij->colmap) {
243905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
244905e6a2fSBarry Smith             }
24548e59246SSatish Balay #if defined (USE_CTABLE)
24648e59246SSatish Balay 	    col = TableFind( baij->colmap, in[j]/bs + 1 ) - 1 + in[j]%bs;
24748e59246SSatish Balay #else
248905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
24948e59246SSatish Balay #endif
25057b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
25157b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
25257b952d6SSatish Balay               col =  in[j];
2539bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2549bf004c3SSatish Balay               B = baij->B;
2559bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2569bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2579bf004c3SSatish Balay               ba=b->a;
25857b952d6SSatish Balay             }
259d64ed03dSBarry Smith           } else col = in[j];
26057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
262ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
26357b952d6SSatish Balay         }
26457b952d6SSatish Balay       }
265d64ed03dSBarry Smith     } else {
26690f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
26757b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
268d64ed03dSBarry Smith       } else {
26990f02eecSBarry Smith         if (!baij->donotstash) {
27057b952d6SSatish Balay           row = im[i];
27157b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
27257b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
27357b952d6SSatish Balay           }
27457b952d6SSatish Balay         }
27557b952d6SSatish Balay       }
27657b952d6SSatish Balay     }
27790f02eecSBarry Smith   }
2783a40ed3dSBarry Smith   PetscFunctionReturn(0);
27957b952d6SSatish Balay }
28057b952d6SSatish Balay 
281ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
282ab26458aSBarry Smith #undef __FUNC__
283ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
284ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
285ab26458aSBarry Smith {
286ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
28730793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
288abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
289ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
290ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
291ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
292ab26458aSBarry Smith 
29330793edcSSatish Balay   if(!barray) {
29447513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
29530793edcSSatish Balay   }
29630793edcSSatish Balay 
297ab26458aSBarry Smith   if (roworiented) {
298ab26458aSBarry Smith     stepval = (n-1)*bs;
299ab26458aSBarry Smith   } else {
300ab26458aSBarry Smith     stepval = (m-1)*bs;
301ab26458aSBarry Smith   }
302ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
3033a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
304a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
305a8c6a408SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
306ab26458aSBarry Smith #endif
307ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
308ab26458aSBarry Smith       row = im[i] - rstart;
309ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
31015b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
31115b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
31215b57d14SSatish Balay           barray = v + i*bs2;
31315b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
31415b57d14SSatish Balay           barray = v + j*bs2;
31515b57d14SSatish Balay         } else { /* Here a copy is required */
316ab26458aSBarry Smith           if (roworiented) {
317ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
318ab26458aSBarry Smith           } else {
319ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
320abef11f7SSatish Balay           }
32147513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
32247513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
32330793edcSSatish Balay               *barray++  = *value++;
32447513183SBarry Smith             }
32547513183SBarry Smith           }
32630793edcSSatish Balay           barray -=bs2;
32715b57d14SSatish Balay         }
328abef11f7SSatish Balay 
329abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
330abef11f7SSatish Balay           col  = in[j] - cstart;
33130793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
332ab26458aSBarry Smith         }
3333a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
334a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
335a8c6a408SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
336ab26458aSBarry Smith #endif
337ab26458aSBarry Smith         else {
338ab26458aSBarry Smith           if (mat->was_assembled) {
339ab26458aSBarry Smith             if (!baij->colmap) {
340ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
341ab26458aSBarry Smith             }
342a5eb4965SSatish Balay 
3433a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
34448e59246SSatish Balay #if defined (USE_CTABLE)
34548e59246SSatish Balay             if( (TableFind( baij->colmap, in[j] + 1 ) - 1) % bs)
34648e59246SSatish Balay 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
34748e59246SSatish Balay #else
348a8c6a408SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
349a5eb4965SSatish Balay #endif
35048e59246SSatish Balay #endif
35148e59246SSatish Balay #if defined (USE_CTABLE)
35248e59246SSatish Balay 	    col = (TableFind( baij->colmap, in[j] + 1 ) - 1)/bs;
35348e59246SSatish Balay #else
354a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
35548e59246SSatish Balay #endif
356ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
357ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
358ab26458aSBarry Smith               col =  in[j];
359ab26458aSBarry Smith             }
360ab26458aSBarry Smith           }
361ab26458aSBarry Smith           else col = in[j];
36230793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363ab26458aSBarry Smith         }
364ab26458aSBarry Smith       }
365d64ed03dSBarry Smith     } else {
366ab26458aSBarry Smith       if (!baij->donotstash) {
367ab26458aSBarry Smith         if (roworiented ) {
368abef11f7SSatish Balay           row   = im[i]*bs;
369abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
370abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
371abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
372abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
373abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
374abef11f7SSatish Balay               }
375ab26458aSBarry Smith             }
376ab26458aSBarry Smith           }
377d64ed03dSBarry Smith         } else {
378ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
379abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
380abef11f7SSatish Balay             col   = in[j]*bs;
381abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
382abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
383abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
384ab26458aSBarry Smith               }
385ab26458aSBarry Smith             }
386ab26458aSBarry Smith           }
387abef11f7SSatish Balay         }
388abef11f7SSatish Balay       }
389ab26458aSBarry Smith     }
390ab26458aSBarry Smith   }
3913a40ed3dSBarry Smith   PetscFunctionReturn(0);
392ab26458aSBarry Smith }
3930bdbc534SSatish Balay #define HASH_KEY 0.6180339887
394c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
395c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
396c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
3975615d1e5SSatish Balay #undef __FUNC__
3980bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
3990bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4000bdbc534SSatish Balay {
4010bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4020bdbc534SSatish Balay   int         ierr,i,j,row,col;
4030bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
404c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
405c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
406c2760754SSatish Balay   double      tmp;
407b9e4cc15SSatish Balay   Scalar      ** HD = baij->hd,value;
4084a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
4094a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4104a15367fSSatish Balay #endif
4110bdbc534SSatish Balay 
4120bdbc534SSatish Balay   PetscFunctionBegin;
4130bdbc534SSatish Balay 
4140bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
4150bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
4160bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4170bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4180bdbc534SSatish Balay #endif
4190bdbc534SSatish Balay       row = im[i];
420c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4210bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4220bdbc534SSatish Balay         col = in[j];
4230bdbc534SSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4240bdbc534SSatish Balay         /* Look up into the Hash Table */
425c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
426c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4270bdbc534SSatish Balay 
428c2760754SSatish Balay 
429c2760754SSatish Balay         idx = h1;
430187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
431187ce0cbSSatish Balay         insert_ct++;
432187ce0cbSSatish Balay         total_ct++;
433187ce0cbSSatish Balay         if (HT[idx] != key) {
434187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
435187ce0cbSSatish Balay           if (idx == size) {
436187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
437187ce0cbSSatish Balay             if (idx == h1) {
438187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
439187ce0cbSSatish Balay             }
440187ce0cbSSatish Balay           }
441187ce0cbSSatish Balay         }
442187ce0cbSSatish Balay #else
443c2760754SSatish Balay         if (HT[idx] != key) {
444c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
445c2760754SSatish Balay           if (idx == size) {
446c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
447c2760754SSatish Balay             if (idx == h1) {
448c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
449c2760754SSatish Balay             }
450c2760754SSatish Balay           }
451c2760754SSatish Balay         }
452187ce0cbSSatish Balay #endif
453c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
454c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
455c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
4560bdbc534SSatish Balay       }
4570bdbc534SSatish Balay     } else {
4580bdbc534SSatish Balay       if (roworiented && !baij->donotstash) {
4590bdbc534SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
4600bdbc534SSatish Balay       } else {
4610bdbc534SSatish Balay         if (!baij->donotstash) {
4620bdbc534SSatish Balay           row = im[i];
4630bdbc534SSatish Balay 	  for ( j=0; j<n; j++ ) {
4640bdbc534SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
4650bdbc534SSatish Balay           }
4660bdbc534SSatish Balay         }
4670bdbc534SSatish Balay       }
4680bdbc534SSatish Balay     }
4690bdbc534SSatish Balay   }
470187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
471187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
472187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
473187ce0cbSSatish Balay #endif
4740bdbc534SSatish Balay   PetscFunctionReturn(0);
4750bdbc534SSatish Balay }
4760bdbc534SSatish Balay 
4770bdbc534SSatish Balay #undef __FUNC__
4780bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
4790bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4800bdbc534SSatish Balay {
4810bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4820bdbc534SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
4830bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
484b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
485c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
486c2760754SSatish Balay   double      tmp;
487187ce0cbSSatish Balay   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
4884a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
4894a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4904a15367fSSatish Balay #endif
4910bdbc534SSatish Balay 
492d0a41580SSatish Balay   PetscFunctionBegin;
493d0a41580SSatish Balay 
4940bdbc534SSatish Balay   if (roworiented) {
4950bdbc534SSatish Balay     stepval = (n-1)*bs;
4960bdbc534SSatish Balay   } else {
4970bdbc534SSatish Balay     stepval = (m-1)*bs;
4980bdbc534SSatish Balay   }
4990bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
5000bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
5010bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
5020bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
5030bdbc534SSatish Balay #endif
5040bdbc534SSatish Balay     row   = im[i];
505187ce0cbSSatish Balay     v_t   = v + i*bs2;
506c2760754SSatish Balay     if (row >= rstart && row < rend) {
5070bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
5080bdbc534SSatish Balay         col = in[j];
5090bdbc534SSatish Balay 
5100bdbc534SSatish Balay         /* Look up into the Hash Table */
511c2760754SSatish Balay         key = row*Nbs+col+1;
512c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5130bdbc534SSatish Balay 
514c2760754SSatish Balay         idx = h1;
515187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
516187ce0cbSSatish Balay         total_ct++;
517187ce0cbSSatish Balay         insert_ct++;
518187ce0cbSSatish Balay        if (HT[idx] != key) {
519187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
520187ce0cbSSatish Balay           if (idx == size) {
521187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
522187ce0cbSSatish Balay             if (idx == h1) {
523187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
524187ce0cbSSatish Balay             }
525187ce0cbSSatish Balay           }
526187ce0cbSSatish Balay         }
527187ce0cbSSatish Balay #else
528c2760754SSatish Balay         if (HT[idx] != key) {
529c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
530c2760754SSatish Balay           if (idx == size) {
531c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
532c2760754SSatish Balay             if (idx == h1) {
533c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
534c2760754SSatish Balay             }
535c2760754SSatish Balay           }
536c2760754SSatish Balay         }
537187ce0cbSSatish Balay #endif
538c2760754SSatish Balay         baij_a = HD[idx];
5390bdbc534SSatish Balay         if (roworiented) {
540c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
541187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
542187ce0cbSSatish Balay           value = v_t;
543187ce0cbSSatish Balay           v_t  += bs;
544fef45726SSatish Balay           if (addv == ADD_VALUES) {
545c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
546c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
547fef45726SSatish Balay                 baij_a[jj]  += *value++;
548b4cc0f5aSSatish Balay               }
549b4cc0f5aSSatish Balay             }
550fef45726SSatish Balay           } else {
551c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
552c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
553fef45726SSatish Balay                 baij_a[jj]  = *value++;
554fef45726SSatish Balay               }
555fef45726SSatish Balay             }
556fef45726SSatish Balay           }
5570bdbc534SSatish Balay         } else {
5580bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
559fef45726SSatish Balay           if (addv == ADD_VALUES) {
560b4cc0f5aSSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
5610bdbc534SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
562fef45726SSatish Balay                 baij_a[jj]  += *value++;
563fef45726SSatish Balay               }
564fef45726SSatish Balay             }
565fef45726SSatish Balay           } else {
566fef45726SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
567fef45726SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
568fef45726SSatish Balay                 baij_a[jj]  = *value++;
569fef45726SSatish Balay               }
570b4cc0f5aSSatish Balay             }
5710bdbc534SSatish Balay           }
5720bdbc534SSatish Balay         }
5730bdbc534SSatish Balay       }
5740bdbc534SSatish Balay     } else {
5750bdbc534SSatish Balay       if (!baij->donotstash) {
5760bdbc534SSatish Balay         if (roworiented ) {
5770bdbc534SSatish Balay           row   = im[i]*bs;
5780bdbc534SSatish Balay           value = v + i*(stepval+bs)*bs;
5790bdbc534SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
5800bdbc534SSatish Balay             for ( k=0; k<n; k++ ) {
5810bdbc534SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
5820bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
5830bdbc534SSatish Balay               }
5840bdbc534SSatish Balay             }
5850bdbc534SSatish Balay           }
5860bdbc534SSatish Balay         } else {
5870bdbc534SSatish Balay           for ( j=0; j<n; j++ ) {
5880bdbc534SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
5890bdbc534SSatish Balay             col   = in[j]*bs;
5900bdbc534SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
5910bdbc534SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
5920bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
5930bdbc534SSatish Balay               }
5940bdbc534SSatish Balay             }
5950bdbc534SSatish Balay           }
5960bdbc534SSatish Balay         }
5970bdbc534SSatish Balay       }
5980bdbc534SSatish Balay     }
5990bdbc534SSatish Balay   }
600187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
601187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
602187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
603187ce0cbSSatish Balay #endif
6040bdbc534SSatish Balay   PetscFunctionReturn(0);
6050bdbc534SSatish Balay }
606133cdb44SSatish Balay 
6070bdbc534SSatish Balay #undef __FUNC__
6085615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
609ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
610d6de1c52SSatish Balay {
611d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
612d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
61348e59246SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
614d6de1c52SSatish Balay 
615133cdb44SSatish Balay   PetscFunctionBegin;
616d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
617a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
618a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
619d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
620d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
621d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
622a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
623a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
624d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
625d6de1c52SSatish Balay           col = idxn[j] - bscstart;
62698dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
627d64ed03dSBarry Smith         } else {
628905e6a2fSBarry Smith           if (!baij->colmap) {
629905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
630905e6a2fSBarry Smith           }
63148e59246SSatish Balay #if defined (USE_CTABLE)
63248e59246SSatish Balay           data = TableFind( baij->colmap, idxn[j]/bs + 1 ) - 1;
63348e59246SSatish Balay #else
63448e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
63548e59246SSatish Balay #endif
63648e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
637d9d09a02SSatish Balay           else {
63848e59246SSatish Balay             col  = data + idxn[j]%bs;
63998dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
640d6de1c52SSatish Balay           }
641d6de1c52SSatish Balay         }
642d6de1c52SSatish Balay       }
643d64ed03dSBarry Smith     } else {
644a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
645d6de1c52SSatish Balay     }
646d6de1c52SSatish Balay   }
6473a40ed3dSBarry Smith  PetscFunctionReturn(0);
648d6de1c52SSatish Balay }
649d6de1c52SSatish Balay 
6505615d1e5SSatish Balay #undef __FUNC__
6515615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
652ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
653d6de1c52SSatish Balay {
654d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
655d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
656acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
657d6de1c52SSatish Balay   double     sum = 0.0;
658d6de1c52SSatish Balay   Scalar     *v;
659d6de1c52SSatish Balay 
660d64ed03dSBarry Smith   PetscFunctionBegin;
661d6de1c52SSatish Balay   if (baij->size == 1) {
662d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
663d6de1c52SSatish Balay   } else {
664d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
665d6de1c52SSatish Balay       v = amat->a;
666d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
6673a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
668e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
669d6de1c52SSatish Balay #else
670d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
671d6de1c52SSatish Balay #endif
672d6de1c52SSatish Balay       }
673d6de1c52SSatish Balay       v = bmat->a;
674d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
6753a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
676e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
677d6de1c52SSatish Balay #else
678d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
679d6de1c52SSatish Balay #endif
680d6de1c52SSatish Balay       }
681ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
682d6de1c52SSatish Balay       *norm = sqrt(*norm);
683d64ed03dSBarry Smith     } else {
684e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
685d6de1c52SSatish Balay     }
686d64ed03dSBarry Smith   }
6873a40ed3dSBarry Smith   PetscFunctionReturn(0);
688d6de1c52SSatish Balay }
68957b952d6SSatish Balay 
6905615d1e5SSatish Balay #undef __FUNC__
6915615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
692ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
69357b952d6SSatish Balay {
69457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
69557b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
69657b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
69757b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
69857b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
69957b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
70057b952d6SSatish Balay   InsertMode  addv;
70157b952d6SSatish Balay   Scalar      *rvalues,*svalues;
70257b952d6SSatish Balay 
703d64ed03dSBarry Smith   PetscFunctionBegin;
704570da906SBarry Smith   if (baij->donotstash) {
705570da906SBarry Smith     baij->svalues    = 0; baij->rvalues    = 0;
706570da906SBarry Smith     baij->nsends     = 0; baij->nrecvs     = 0;
707570da906SBarry Smith     baij->send_waits = 0; baij->recv_waits = 0;
708570da906SBarry Smith     baij->rmax       = 0;
709570da906SBarry Smith     PetscFunctionReturn(0);
710570da906SBarry Smith   }
711570da906SBarry Smith 
71257b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
713ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
71457b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
715a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
71657b952d6SSatish Balay   }
717e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
71857b952d6SSatish Balay 
71957b952d6SSatish Balay   /*  first count number of contributors to each processor */
72057b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
72157b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
72257b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
72357b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
72457b952d6SSatish Balay     idx = baij->stash.idx[i];
72557b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
72657b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
72757b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
72857b952d6SSatish Balay       }
72957b952d6SSatish Balay     }
73057b952d6SSatish Balay   }
73157b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
73257b952d6SSatish Balay 
73357b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
73457b952d6SSatish Balay   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
735ca161407SBarry Smith   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
73657b952d6SSatish Balay   nreceives = work[rank];
737ca161407SBarry Smith   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
73857b952d6SSatish Balay   nmax      = work[rank];
73957b952d6SSatish Balay   PetscFree(work);
74057b952d6SSatish Balay 
74157b952d6SSatish Balay   /* post receives:
74257b952d6SSatish Balay        1) each message will consist of ordered pairs
74357b952d6SSatish Balay      (global index,value) we store the global index as a double
74457b952d6SSatish Balay      to simplify the message passing.
74557b952d6SSatish Balay        2) since we don't know how long each individual message is we
74657b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
74757b952d6SSatish Balay      this is a lot of wasted space.
74857b952d6SSatish Balay 
74957b952d6SSatish Balay 
75057b952d6SSatish Balay        This could be done better.
75157b952d6SSatish Balay   */
752f8abc2e8SBarry Smith   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
753f8abc2e8SBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
75457b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
755ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
75657b952d6SSatish Balay   }
75757b952d6SSatish Balay 
75857b952d6SSatish Balay   /* do sends:
75957b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
76057b952d6SSatish Balay          the ith processor
76157b952d6SSatish Balay   */
76257b952d6SSatish Balay   svalues    = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
763d64ed03dSBarry Smith   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
76457b952d6SSatish Balay   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
76557b952d6SSatish Balay   starts[0] = 0;
76657b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
76757b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
76857b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
76957b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
77057b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
77157b952d6SSatish Balay   }
77257b952d6SSatish Balay   PetscFree(owner);
77357b952d6SSatish Balay   starts[0] = 0;
77457b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
77557b952d6SSatish Balay   count = 0;
77657b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
77757b952d6SSatish Balay     if (procs[i]) {
778ca161407SBarry Smith       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
77957b952d6SSatish Balay     }
78057b952d6SSatish Balay   }
78157b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
78257b952d6SSatish Balay 
78357b952d6SSatish Balay   /* Free cache space */
78410a665d1SBarry Smith   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
78557b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
78657b952d6SSatish Balay 
78757b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
78857b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
78957b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
79057b952d6SSatish Balay   baij->rmax       = nmax;
79157b952d6SSatish Balay 
7923a40ed3dSBarry Smith   PetscFunctionReturn(0);
79357b952d6SSatish Balay }
794bd7f49f5SSatish Balay 
795fef45726SSatish Balay /*
796fef45726SSatish Balay   Creates the hash table, and sets the table
797fef45726SSatish Balay   This table is created only once.
798fef45726SSatish Balay   If new entried need to be added to the matrix
799fef45726SSatish Balay   then the hash table has to be destroyed and
800fef45726SSatish Balay   recreated.
801fef45726SSatish Balay */
802fef45726SSatish Balay #undef __FUNC__
803fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
804d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
805596b8d2eSBarry Smith {
806596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
807596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
808596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
8090bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
8104a15367fSSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart;
811187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
812fef45726SSatish Balay   int         *HT,key;
8130bdbc534SSatish Balay   Scalar      **HD;
814c2760754SSatish Balay   double      tmp;
8154a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
8164a15367fSSatish Balay   int         ct=0,max=0;
8174a15367fSSatish Balay #endif
818fef45726SSatish Balay 
819d64ed03dSBarry Smith   PetscFunctionBegin;
8200bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
8210bdbc534SSatish Balay   size = baij->ht_size;
822fef45726SSatish Balay 
8230bdbc534SSatish Balay   if (baij->ht) {
8240bdbc534SSatish Balay     PetscFunctionReturn(0);
825596b8d2eSBarry Smith   }
8260bdbc534SSatish Balay 
827fef45726SSatish Balay   /* Allocate Memory for Hash Table */
828b9e4cc15SSatish Balay   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
829b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
830b9e4cc15SSatish Balay   HD = baij->hd;
831a07cd24cSSatish Balay   HT = baij->ht;
832b9e4cc15SSatish Balay 
833b9e4cc15SSatish Balay 
834c2760754SSatish Balay   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
8350bdbc534SSatish Balay 
836596b8d2eSBarry Smith 
837596b8d2eSBarry Smith   /* Loop Over A */
8380bdbc534SSatish Balay   for ( i=0; i<a->mbs; i++ ) {
839596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8400bdbc534SSatish Balay       row = i+rstart;
8410bdbc534SSatish Balay       col = aj[j]+cstart;
842596b8d2eSBarry Smith 
843187ce0cbSSatish Balay       key = row*Nbs + col + 1;
844c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8450bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
8460bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8470bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8480bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
849596b8d2eSBarry Smith           break;
850187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
851187ce0cbSSatish Balay         } else {
852187ce0cbSSatish Balay           ct++;
853187ce0cbSSatish Balay #endif
854596b8d2eSBarry Smith         }
855187ce0cbSSatish Balay       }
856187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
857187ce0cbSSatish Balay       if (k> max) max = k;
858187ce0cbSSatish Balay #endif
859596b8d2eSBarry Smith     }
860596b8d2eSBarry Smith   }
861596b8d2eSBarry Smith   /* Loop Over B */
8620bdbc534SSatish Balay   for ( i=0; i<b->mbs; i++ ) {
863596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
8640bdbc534SSatish Balay       row = i+rstart;
8650bdbc534SSatish Balay       col = garray[bj[j]];
866187ce0cbSSatish Balay       key = row*Nbs + col + 1;
867c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8680bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
8690bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8700bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8710bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
872596b8d2eSBarry Smith           break;
873187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
874187ce0cbSSatish Balay         } else {
875187ce0cbSSatish Balay           ct++;
876187ce0cbSSatish Balay #endif
877596b8d2eSBarry Smith         }
878187ce0cbSSatish Balay       }
879187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
880187ce0cbSSatish Balay       if (k> max) max = k;
881187ce0cbSSatish Balay #endif
882596b8d2eSBarry Smith     }
883596b8d2eSBarry Smith   }
884596b8d2eSBarry Smith 
885596b8d2eSBarry Smith   /* Print Summary */
886187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
887c2760754SSatish Balay   for ( i=0,j=0; i<size; i++)
888596b8d2eSBarry Smith     if (HT[i]) {j++;}
889187ce0cbSSatish Balay   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
890187ce0cbSSatish Balay            (j== 0)? 0.0:((double)(ct+j))/j,max);
891187ce0cbSSatish Balay #endif
8923a40ed3dSBarry Smith   PetscFunctionReturn(0);
893596b8d2eSBarry Smith }
89457b952d6SSatish Balay 
8955615d1e5SSatish Balay #undef __FUNC__
8965615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
897ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
89857b952d6SSatish Balay {
89957b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
90057b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
90157b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
902b7029e64SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
90357b952d6SSatish Balay   Scalar      *values,val;
904e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
90557b952d6SSatish Balay 
906d64ed03dSBarry Smith   PetscFunctionBegin;
90757b952d6SSatish Balay   /*  wait on receives */
90857b952d6SSatish Balay   while (count) {
909ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
91057b952d6SSatish Balay     /* unpack receives into our local space */
91157b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
912ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
91357b952d6SSatish Balay     n = n/3;
91457b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
91557b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
91657b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
91757b952d6SSatish Balay       val = values[3*i+2];
91857b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
91957b952d6SSatish Balay         col -= baij->cstart*bs;
9206fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
921d64ed03dSBarry Smith       } else {
92257b952d6SSatish Balay         if (mat->was_assembled) {
923905e6a2fSBarry Smith           if (!baij->colmap) {
924905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
925905e6a2fSBarry Smith           }
92648e59246SSatish Balay #if defined (USE_CTABLE)
92748e59246SSatish Balay 	  col = TableFind( baij->colmap, col/bs + 1 ) - 1 + col%bs;
92848e59246SSatish Balay #else
929a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
93048e59246SSatish Balay #endif
93157b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
93257b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
93357b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
93457b952d6SSatish Balay           }
93557b952d6SSatish Balay         }
9366fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
93757b952d6SSatish Balay       }
93857b952d6SSatish Balay     }
93957b952d6SSatish Balay     count--;
94057b952d6SSatish Balay   }
941570da906SBarry Smith   if (baij->recv_waits) PetscFree(baij->recv_waits);
942570da906SBarry Smith   if (baij->rvalues)    PetscFree(baij->rvalues);
94357b952d6SSatish Balay 
94457b952d6SSatish Balay   /* wait on sends */
94557b952d6SSatish Balay   if (baij->nsends) {
946d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
947ca161407SBarry Smith     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
94857b952d6SSatish Balay     PetscFree(send_status);
94957b952d6SSatish Balay   }
950570da906SBarry Smith   if (baij->send_waits) PetscFree(baij->send_waits);
951570da906SBarry Smith   if (baij->svalues)    PetscFree(baij->svalues);
95257b952d6SSatish Balay 
95357b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
95457b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
95557b952d6SSatish Balay 
95657b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
95757b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
9586e713f22SBarry Smith   /*
9596e713f22SBarry Smith      if nonzero structure of submatrix B cannot change then we know that
9606e713f22SBarry Smith      no processor disassembled thus we can skip this stuff
9616e713f22SBarry Smith   */
9626e713f22SBarry Smith   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
963ca161407SBarry Smith     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
96457b952d6SSatish Balay     if (mat->was_assembled && !other_disassembled) {
96557b952d6SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
96657b952d6SSatish Balay     }
9676e713f22SBarry Smith   }
96857b952d6SSatish Balay 
9696d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
97057b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
97157b952d6SSatish Balay   }
97257b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
97357b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
97457b952d6SSatish Balay 
975187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
976187ce0cbSSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
977187ce0cbSSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
978187ce0cbSSatish Balay              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
979187ce0cbSSatish Balay     baij->ht_total_ct  = 0;
980187ce0cbSSatish Balay     baij->ht_insert_ct = 0;
981187ce0cbSSatish Balay   }
982187ce0cbSSatish Balay #endif
983133cdb44SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
984133cdb44SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
985f830108cSBarry Smith     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
986f830108cSBarry Smith     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
987bd7f49f5SSatish Balay   }
988187ce0cbSSatish Balay 
98957b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
9903a40ed3dSBarry Smith   PetscFunctionReturn(0);
99157b952d6SSatish Balay }
99257b952d6SSatish Balay 
9935615d1e5SSatish Balay #undef __FUNC__
9945615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
99557b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
99657b952d6SSatish Balay {
99757b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
99857b952d6SSatish Balay   int          ierr;
99957b952d6SSatish Balay 
1000d64ed03dSBarry Smith   PetscFunctionBegin;
100157b952d6SSatish Balay   if (baij->size == 1) {
100257b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1003a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
10043a40ed3dSBarry Smith   PetscFunctionReturn(0);
100557b952d6SSatish Balay }
100657b952d6SSatish Balay 
10075615d1e5SSatish Balay #undef __FUNC__
10085615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
100957b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
101057b952d6SSatish Balay {
101157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
101277ed5343SBarry Smith   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
101357b952d6SSatish Balay   FILE         *fd;
101457b952d6SSatish Balay   ViewerType   vtype;
101557b952d6SSatish Balay 
1016d64ed03dSBarry Smith   PetscFunctionBegin;
101757b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
10183f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
101957b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
1020639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
10214e220ebcSLois Curfman McInnes       MatInfo info;
102257b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
102357b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
10244e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
102557b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
102657b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
10274e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
10284e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
10294e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
10304e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
10314e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
10324e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
103357b952d6SSatish Balay       fflush(fd);
103457b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
103557b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
10363a40ed3dSBarry Smith       PetscFunctionReturn(0);
1037d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1038bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
10393a40ed3dSBarry Smith       PetscFunctionReturn(0);
104057b952d6SSatish Balay     }
104157b952d6SSatish Balay   }
104257b952d6SSatish Balay 
10433f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
104457b952d6SSatish Balay     Draw       draw;
104557b952d6SSatish Balay     PetscTruth isnull;
104677ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
10473a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
104857b952d6SSatish Balay   }
104957b952d6SSatish Balay 
105057b952d6SSatish Balay   if (size == 1) {
105157b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1052d64ed03dSBarry Smith   } else {
105357b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
105457b952d6SSatish Balay     Mat         A;
105557b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
105640011551SBarry Smith     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
105757b952d6SSatish Balay     int         mbs = baij->mbs;
105857b952d6SSatish Balay     Scalar      *a;
105957b952d6SSatish Balay 
106057b952d6SSatish Balay     if (!rank) {
106155843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1062d64ed03dSBarry Smith     } else {
106355843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
106457b952d6SSatish Balay     }
106557b952d6SSatish Balay     PLogObjectParent(mat,A);
106657b952d6SSatish Balay 
106757b952d6SSatish Balay     /* copy over the A part */
106857b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->A->data;
106957b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
107057b952d6SSatish Balay     rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
107157b952d6SSatish Balay 
107257b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
107357b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
107457b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
107557b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
107657b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
107757b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1078cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1079cee3aa6bSSatish Balay           col++; a += bs;
108057b952d6SSatish Balay         }
108157b952d6SSatish Balay       }
108257b952d6SSatish Balay     }
108357b952d6SSatish Balay     /* copy over the B part */
108457b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->B->data;
108557b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
108657b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
108757b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
108857b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
108957b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
109057b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
109157b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1092cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1093cee3aa6bSSatish Balay           col++; a += bs;
109457b952d6SSatish Balay         }
109557b952d6SSatish Balay       }
109657b952d6SSatish Balay     }
109757b952d6SSatish Balay     PetscFree(rvals);
10986d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
10996d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
110055843e3eSBarry Smith     /*
110155843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
110255843e3eSBarry Smith        synchronized across all processors that share the Draw object
110355843e3eSBarry Smith     */
11043f1db9ecSBarry Smith     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
110557b952d6SSatish Balay       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
110657b952d6SSatish Balay     }
110757b952d6SSatish Balay     ierr = MatDestroy(A); CHKERRQ(ierr);
110857b952d6SSatish Balay   }
11093a40ed3dSBarry Smith   PetscFunctionReturn(0);
111057b952d6SSatish Balay }
111157b952d6SSatish Balay 
111257b952d6SSatish Balay 
111357b952d6SSatish Balay 
11145615d1e5SSatish Balay #undef __FUNC__
11155615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
1116e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer)
111757b952d6SSatish Balay {
111857b952d6SSatish Balay   int         ierr;
111957b952d6SSatish Balay   ViewerType  vtype;
112057b952d6SSatish Balay 
1121d64ed03dSBarry Smith   PetscFunctionBegin;
112257b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
11233f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
11243f1db9ecSBarry Smith       PetscTypeCompare(vtype,MATLAB_VIEWER)) {
112557b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
11263f1db9ecSBarry Smith   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
11273a40ed3dSBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
11285cd90555SBarry Smith   } else {
11295cd90555SBarry Smith     SETERRQ(1,1,"Viewer type not supported by PETSc object");
113057b952d6SSatish Balay   }
11313a40ed3dSBarry Smith   PetscFunctionReturn(0);
113257b952d6SSatish Balay }
113357b952d6SSatish Balay 
11345615d1e5SSatish Balay #undef __FUNC__
11355615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
1136e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
113779bdfe76SSatish Balay {
113879bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
113979bdfe76SSatish Balay   int         ierr;
114079bdfe76SSatish Balay 
1141d64ed03dSBarry Smith   PetscFunctionBegin;
114298dd23e9SBarry Smith   if (--mat->refct > 0) PetscFunctionReturn(0);
114398dd23e9SBarry Smith 
114498dd23e9SBarry Smith   if (mat->mapping) {
114598dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
114698dd23e9SBarry Smith   }
114798dd23e9SBarry Smith   if (mat->bmapping) {
114898dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
114998dd23e9SBarry Smith   }
115061b13de0SBarry Smith   if (mat->rmap) {
115161b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
115261b13de0SBarry Smith   }
115361b13de0SBarry Smith   if (mat->cmap) {
115461b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
115561b13de0SBarry Smith   }
11563a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
1157e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
115879bdfe76SSatish Balay #endif
115979bdfe76SSatish Balay 
116083e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
116179bdfe76SSatish Balay   PetscFree(baij->rowners);
116279bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
116379bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
116448e59246SSatish Balay #if defined (USE_CTABLE)
116548e59246SSatish Balay   if (baij->colmap) TableDelete(baij->colmap);
116648e59246SSatish Balay #else
116779bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
116848e59246SSatish Balay #endif
116979bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
117079bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
117179bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
117279bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
117330793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
1174b9e4cc15SSatish Balay   if (baij->hd) PetscFree(baij->hd);
117579bdfe76SSatish Balay   PetscFree(baij);
117679bdfe76SSatish Balay   PLogObjectDestroy(mat);
117779bdfe76SSatish Balay   PetscHeaderDestroy(mat);
11783a40ed3dSBarry Smith   PetscFunctionReturn(0);
117979bdfe76SSatish Balay }
118079bdfe76SSatish Balay 
11815615d1e5SSatish Balay #undef __FUNC__
11825615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
1183ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1184cee3aa6bSSatish Balay {
1185cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
118647b4a8eaSLois Curfman McInnes   int         ierr, nt;
1187cee3aa6bSSatish Balay 
1188d64ed03dSBarry Smith   PetscFunctionBegin;
1189e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
119047b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1191a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
119247b4a8eaSLois Curfman McInnes   }
1193e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
119447b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1195a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
119647b4a8eaSLois Curfman McInnes   }
119743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1198f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
119943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1200f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
120143a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
12023a40ed3dSBarry Smith   PetscFunctionReturn(0);
1203cee3aa6bSSatish Balay }
1204cee3aa6bSSatish Balay 
12055615d1e5SSatish Balay #undef __FUNC__
12065615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
1207ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1208cee3aa6bSSatish Balay {
1209cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1210cee3aa6bSSatish Balay   int        ierr;
1211d64ed03dSBarry Smith 
1212d64ed03dSBarry Smith   PetscFunctionBegin;
121343a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1214f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
121543a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1216f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
12173a40ed3dSBarry Smith   PetscFunctionReturn(0);
1218cee3aa6bSSatish Balay }
1219cee3aa6bSSatish Balay 
12205615d1e5SSatish Balay #undef __FUNC__
12215615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
1222ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1223cee3aa6bSSatish Balay {
1224cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1225cee3aa6bSSatish Balay   int         ierr;
1226cee3aa6bSSatish Balay 
1227d64ed03dSBarry Smith   PetscFunctionBegin;
1228cee3aa6bSSatish Balay   /* do nondiagonal part */
1229f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1230cee3aa6bSSatish Balay   /* send it on its way */
1231537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1232cee3aa6bSSatish Balay   /* do local part */
1233f830108cSBarry Smith   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1234cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1235cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1236cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1237639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12383a40ed3dSBarry Smith   PetscFunctionReturn(0);
1239cee3aa6bSSatish Balay }
1240cee3aa6bSSatish Balay 
12415615d1e5SSatish Balay #undef __FUNC__
12425615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1243ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1244cee3aa6bSSatish Balay {
1245cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1246cee3aa6bSSatish Balay   int         ierr;
1247cee3aa6bSSatish Balay 
1248d64ed03dSBarry Smith   PetscFunctionBegin;
1249cee3aa6bSSatish Balay   /* do nondiagonal part */
1250f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1251cee3aa6bSSatish Balay   /* send it on its way */
1252537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1253cee3aa6bSSatish Balay   /* do local part */
1254f830108cSBarry Smith   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1255cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1256cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1257cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1258537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
12593a40ed3dSBarry Smith   PetscFunctionReturn(0);
1260cee3aa6bSSatish Balay }
1261cee3aa6bSSatish Balay 
1262cee3aa6bSSatish Balay /*
1263cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1264cee3aa6bSSatish Balay    diagonal block
1265cee3aa6bSSatish Balay */
12665615d1e5SSatish Balay #undef __FUNC__
12675615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1268ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1269cee3aa6bSSatish Balay {
1270cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
12713a40ed3dSBarry Smith   int         ierr;
1272d64ed03dSBarry Smith 
1273d64ed03dSBarry Smith   PetscFunctionBegin;
1274a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
12753a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
12763a40ed3dSBarry Smith   PetscFunctionReturn(0);
1277cee3aa6bSSatish Balay }
1278cee3aa6bSSatish Balay 
12795615d1e5SSatish Balay #undef __FUNC__
12805615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
1281ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1282cee3aa6bSSatish Balay {
1283cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1284cee3aa6bSSatish Balay   int         ierr;
1285d64ed03dSBarry Smith 
1286d64ed03dSBarry Smith   PetscFunctionBegin;
1287cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1288cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
12893a40ed3dSBarry Smith   PetscFunctionReturn(0);
1290cee3aa6bSSatish Balay }
1291026e39d0SSatish Balay 
12925615d1e5SSatish Balay #undef __FUNC__
12935615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
1294ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
129557b952d6SSatish Balay {
129657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1297d64ed03dSBarry Smith 
1298d64ed03dSBarry Smith   PetscFunctionBegin;
1299bd7f49f5SSatish Balay   if (m) *m = mat->M;
1300bd7f49f5SSatish Balay   if (n) *n = mat->N;
13013a40ed3dSBarry Smith   PetscFunctionReturn(0);
130257b952d6SSatish Balay }
130357b952d6SSatish Balay 
13045615d1e5SSatish Balay #undef __FUNC__
13055615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1306ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
130757b952d6SSatish Balay {
130857b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1309d64ed03dSBarry Smith 
1310d64ed03dSBarry Smith   PetscFunctionBegin;
1311f830108cSBarry Smith   *m = mat->m; *n = mat->n;
13123a40ed3dSBarry Smith   PetscFunctionReturn(0);
131357b952d6SSatish Balay }
131457b952d6SSatish Balay 
13155615d1e5SSatish Balay #undef __FUNC__
13165615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1317ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
131857b952d6SSatish Balay {
131957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1320d64ed03dSBarry Smith 
1321d64ed03dSBarry Smith   PetscFunctionBegin;
132257b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
13233a40ed3dSBarry Smith   PetscFunctionReturn(0);
132457b952d6SSatish Balay }
132557b952d6SSatish Balay 
1326acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1327acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1328acdf5bf4SSatish Balay 
13295615d1e5SSatish Balay #undef __FUNC__
13305615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1331acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1332acdf5bf4SSatish Balay {
1333acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1334acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1335acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1336d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1337d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1338acdf5bf4SSatish Balay 
1339d64ed03dSBarry Smith   PetscFunctionBegin;
1340a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1341acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1342acdf5bf4SSatish Balay 
1343acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1344acdf5bf4SSatish Balay     /*
1345acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1346acdf5bf4SSatish Balay     */
1347acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1348bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1349bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1350acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1351acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1352acdf5bf4SSatish Balay     }
1353acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1354acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1355acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1356acdf5bf4SSatish Balay   }
1357acdf5bf4SSatish Balay 
1358a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1359d9d09a02SSatish Balay   lrow = row - brstart;
1360acdf5bf4SSatish Balay 
1361acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1362acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1363acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1364f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1365f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1366acdf5bf4SSatish Balay   nztot = nzA + nzB;
1367acdf5bf4SSatish Balay 
1368acdf5bf4SSatish Balay   cmap  = mat->garray;
1369acdf5bf4SSatish Balay   if (v  || idx) {
1370acdf5bf4SSatish Balay     if (nztot) {
1371acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1372acdf5bf4SSatish Balay       int imark = -1;
1373acdf5bf4SSatish Balay       if (v) {
1374acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1375acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1376d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1377acdf5bf4SSatish Balay           else break;
1378acdf5bf4SSatish Balay         }
1379acdf5bf4SSatish Balay         imark = i;
1380acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1381acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1382acdf5bf4SSatish Balay       }
1383acdf5bf4SSatish Balay       if (idx) {
1384acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1385acdf5bf4SSatish Balay         if (imark > -1) {
1386acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1387bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1388acdf5bf4SSatish Balay           }
1389acdf5bf4SSatish Balay         } else {
1390acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1391d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1392d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1393acdf5bf4SSatish Balay             else break;
1394acdf5bf4SSatish Balay           }
1395acdf5bf4SSatish Balay           imark = i;
1396acdf5bf4SSatish Balay         }
1397d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1398d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1399acdf5bf4SSatish Balay       }
1400d64ed03dSBarry Smith     } else {
1401d212a18eSSatish Balay       if (idx) *idx = 0;
1402d212a18eSSatish Balay       if (v)   *v   = 0;
1403d212a18eSSatish Balay     }
1404acdf5bf4SSatish Balay   }
1405acdf5bf4SSatish Balay   *nz = nztot;
1406f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1407f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
14083a40ed3dSBarry Smith   PetscFunctionReturn(0);
1409acdf5bf4SSatish Balay }
1410acdf5bf4SSatish Balay 
14115615d1e5SSatish Balay #undef __FUNC__
14125615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1413acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1414acdf5bf4SSatish Balay {
1415acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1416d64ed03dSBarry Smith 
1417d64ed03dSBarry Smith   PetscFunctionBegin;
1418acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1419a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1420acdf5bf4SSatish Balay   }
1421acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14223a40ed3dSBarry Smith   PetscFunctionReturn(0);
1423acdf5bf4SSatish Balay }
1424acdf5bf4SSatish Balay 
14255615d1e5SSatish Balay #undef __FUNC__
14265615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1427ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
14285a838052SSatish Balay {
14295a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1430d64ed03dSBarry Smith 
1431d64ed03dSBarry Smith   PetscFunctionBegin;
14325a838052SSatish Balay   *bs = baij->bs;
14333a40ed3dSBarry Smith   PetscFunctionReturn(0);
14345a838052SSatish Balay }
14355a838052SSatish Balay 
14365615d1e5SSatish Balay #undef __FUNC__
14375615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1438ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
143958667388SSatish Balay {
144058667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
144158667388SSatish Balay   int         ierr;
1442d64ed03dSBarry Smith 
1443d64ed03dSBarry Smith   PetscFunctionBegin;
144458667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
144558667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
14463a40ed3dSBarry Smith   PetscFunctionReturn(0);
144758667388SSatish Balay }
14480ac07820SSatish Balay 
14495615d1e5SSatish Balay #undef __FUNC__
14505615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1451ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14520ac07820SSatish Balay {
14534e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
14544e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
14557d57db60SLois Curfman McInnes   int         ierr;
14567d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
14570ac07820SSatish Balay 
1458d64ed03dSBarry Smith   PetscFunctionBegin;
14594e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
14604e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
14610e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1462de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14634e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
14640e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1465de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
14660ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14674e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14684e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14694e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14704e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14714e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14720ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1473f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
14744e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14754e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14764e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14774e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14784e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14790ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1480f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
14814e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14824e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14834e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14844e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14854e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14860ac07820SSatish Balay   }
14875a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
14885a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
14895a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
14905a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
14914e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14924e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14934e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14943a40ed3dSBarry Smith   PetscFunctionReturn(0);
14950ac07820SSatish Balay }
14960ac07820SSatish Balay 
14975615d1e5SSatish Balay #undef __FUNC__
14985615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1499ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
150058667388SSatish Balay {
150158667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
150258667388SSatish Balay 
1503d64ed03dSBarry Smith   PetscFunctionBegin;
150458667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
150558667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
15066da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1507c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
150896854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1509c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1510b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1511b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1512b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1513aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
151458667388SSatish Balay         MatSetOption(a->A,op);
151558667388SSatish Balay         MatSetOption(a->B,op);
1516b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
15176da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
151858667388SSatish Balay              op == MAT_SYMMETRIC ||
151958667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1520b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
1521b51ba29fSSatish Balay              op == MAT_USE_HASH_TABLE)
152258667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
152358667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
152458667388SSatish Balay     a->roworiented = 0;
152558667388SSatish Balay     MatSetOption(a->A,op);
152658667388SSatish Balay     MatSetOption(a->B,op);
15272b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
152890f02eecSBarry Smith     a->donotstash = 1;
1529d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1530d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1531133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1532133cdb44SSatish Balay     a->ht_flag = 1;
1533d64ed03dSBarry Smith   } else {
1534d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1535d64ed03dSBarry Smith   }
15363a40ed3dSBarry Smith   PetscFunctionReturn(0);
153758667388SSatish Balay }
153858667388SSatish Balay 
15395615d1e5SSatish Balay #undef __FUNC__
15405615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1541ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15420ac07820SSatish Balay {
15430ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
15440ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15450ac07820SSatish Balay   Mat        B;
154640011551SBarry Smith   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
15470ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
15480ac07820SSatish Balay   Scalar     *a;
15490ac07820SSatish Balay 
1550d64ed03dSBarry Smith   PetscFunctionBegin;
1551a8c6a408SBarry Smith   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
15520ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
15530ac07820SSatish Balay   CHKERRQ(ierr);
15540ac07820SSatish Balay 
15550ac07820SSatish Balay   /* copy over the A part */
15560ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
15570ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15580ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
15590ac07820SSatish Balay 
15600ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
15610ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15620ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
15630ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
15640ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
15650ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
15660ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15670ac07820SSatish Balay         col++; a += bs;
15680ac07820SSatish Balay       }
15690ac07820SSatish Balay     }
15700ac07820SSatish Balay   }
15710ac07820SSatish Balay   /* copy over the B part */
15720ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
15730ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15740ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
15750ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15760ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
15770ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
15780ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
15790ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
15800ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15810ac07820SSatish Balay         col++; a += bs;
15820ac07820SSatish Balay       }
15830ac07820SSatish Balay     }
15840ac07820SSatish Balay   }
15850ac07820SSatish Balay   PetscFree(rvals);
15860ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15870ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15880ac07820SSatish Balay 
15890ac07820SSatish Balay   if (matout != PETSC_NULL) {
15900ac07820SSatish Balay     *matout = B;
15910ac07820SSatish Balay   } else {
1592f830108cSBarry Smith     PetscOps *Abops;
1593cc2dc46cSBarry Smith     MatOps   Aops;
1594f830108cSBarry Smith 
15950ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
15960ac07820SSatish Balay     PetscFree(baij->rowners);
15970ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
15980ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1599*b1fc9764SSatish Balay #if defined (USE_CTABLE)
1600*b1fc9764SSatish Balay     if (baij->colmap) TableDelete(baij->colmap);
1601*b1fc9764SSatish Balay #else
16020ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
1603*b1fc9764SSatish Balay #endif
16040ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
16050ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
16060ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
16070ac07820SSatish Balay     PetscFree(baij);
1608f830108cSBarry Smith 
1609f830108cSBarry Smith     /*
1610f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1611f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1612f830108cSBarry Smith       else from C.
1613f830108cSBarry Smith     */
1614f830108cSBarry Smith     Abops = A->bops;
1615f830108cSBarry Smith     Aops  = A->ops;
1616f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1617f830108cSBarry Smith     A->bops = Abops;
1618f830108cSBarry Smith     A->ops  = Aops;
1619f830108cSBarry Smith 
16200ac07820SSatish Balay     PetscHeaderDestroy(B);
16210ac07820SSatish Balay   }
16223a40ed3dSBarry Smith   PetscFunctionReturn(0);
16230ac07820SSatish Balay }
16240e95ebc0SSatish Balay 
16255615d1e5SSatish Balay #undef __FUNC__
16265615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
16270e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
16280e95ebc0SSatish Balay {
16290e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
16300e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
16310e95ebc0SSatish Balay   int ierr,s1,s2,s3;
16320e95ebc0SSatish Balay 
1633d64ed03dSBarry Smith   PetscFunctionBegin;
16340e95ebc0SSatish Balay   if (ll)  {
16350e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
16360e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1637a8c6a408SBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
16380e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
16390e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
16400e95ebc0SSatish Balay   }
1641a8c6a408SBarry Smith   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
16423a40ed3dSBarry Smith   PetscFunctionReturn(0);
16430e95ebc0SSatish Balay }
16440e95ebc0SSatish Balay 
16455615d1e5SSatish Balay #undef __FUNC__
16465615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1647ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
16480ac07820SSatish Balay {
16490ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
16500ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1651a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
16520ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
16530ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1654a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16550ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16560ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16570ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16580ac07820SSatish Balay   IS             istmp;
165972dacd9aSBarry Smith   PetscTruth     localdiag;
16600ac07820SSatish Balay 
1661d64ed03dSBarry Smith   PetscFunctionBegin;
16620ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
16630ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
16640ac07820SSatish Balay 
16650ac07820SSatish Balay   /*  first count number of contributors to each processor */
16660ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
16670ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
16680ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
16690ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
16700ac07820SSatish Balay     idx = rows[i];
16710ac07820SSatish Balay     found = 0;
16720ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
16730ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
16740ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
16750ac07820SSatish Balay       }
16760ac07820SSatish Balay     }
1677a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
16780ac07820SSatish Balay   }
16790ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
16800ac07820SSatish Balay 
16810ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
16820ac07820SSatish Balay   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1683ca161407SBarry Smith   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
16840ac07820SSatish Balay   nrecvs = work[rank];
1685ca161407SBarry Smith   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
16860ac07820SSatish Balay   nmax   = work[rank];
16870ac07820SSatish Balay   PetscFree(work);
16880ac07820SSatish Balay 
16890ac07820SSatish Balay   /* post receives:   */
1690d64ed03dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1691d64ed03dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
16920ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
1693ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
16940ac07820SSatish Balay   }
16950ac07820SSatish Balay 
16960ac07820SSatish Balay   /* do sends:
16970ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
16980ac07820SSatish Balay      the ith processor
16990ac07820SSatish Balay   */
17000ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1701ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
17020ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
17030ac07820SSatish Balay   starts[0] = 0;
17040ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
17050ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
17060ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
17070ac07820SSatish Balay   }
17080ac07820SSatish Balay   ISRestoreIndices(is,&rows);
17090ac07820SSatish Balay 
17100ac07820SSatish Balay   starts[0] = 0;
17110ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
17120ac07820SSatish Balay   count = 0;
17130ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
17140ac07820SSatish Balay     if (procs[i]) {
1715ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17160ac07820SSatish Balay     }
17170ac07820SSatish Balay   }
17180ac07820SSatish Balay   PetscFree(starts);
17190ac07820SSatish Balay 
17200ac07820SSatish Balay   base = owners[rank]*bs;
17210ac07820SSatish Balay 
17220ac07820SSatish Balay   /*  wait on receives */
17230ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
17240ac07820SSatish Balay   source = lens + nrecvs;
17250ac07820SSatish Balay   count  = nrecvs; slen = 0;
17260ac07820SSatish Balay   while (count) {
1727ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17280ac07820SSatish Balay     /* unpack receives into our local space */
1729ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
17300ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17310ac07820SSatish Balay     lens[imdex]  = n;
17320ac07820SSatish Balay     slen += n;
17330ac07820SSatish Balay     count--;
17340ac07820SSatish Balay   }
17350ac07820SSatish Balay   PetscFree(recv_waits);
17360ac07820SSatish Balay 
17370ac07820SSatish Balay   /* move the data into the send scatter */
17380ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
17390ac07820SSatish Balay   count = 0;
17400ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
17410ac07820SSatish Balay     values = rvalues + i*nmax;
17420ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
17430ac07820SSatish Balay       lrows[count++] = values[j] - base;
17440ac07820SSatish Balay     }
17450ac07820SSatish Balay   }
17460ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
17470ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
17480ac07820SSatish Balay 
17490ac07820SSatish Balay   /* actually zap the local rows */
1750029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
17510ac07820SSatish Balay   PLogObjectParent(A,istmp);
1752a07cd24cSSatish Balay 
175372dacd9aSBarry Smith   /*
175472dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
175572dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
175672dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
175772dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
175872dacd9aSBarry Smith 
175972dacd9aSBarry Smith        Contributed by: Mathew Knepley
176072dacd9aSBarry Smith   */
176172dacd9aSBarry Smith   localdiag = PETSC_FALSE;
176272dacd9aSBarry Smith   if (diag && (l->A->M == l->A->N)) {
176372dacd9aSBarry Smith     localdiag = PETSC_TRUE;
176472dacd9aSBarry Smith     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
176572dacd9aSBarry Smith   } else {
1766a07cd24cSSatish Balay     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
176772dacd9aSBarry Smith   }
17680ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
17690ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
17700ac07820SSatish Balay 
177172dacd9aSBarry Smith   if (diag && (localdiag == PETSC_FALSE)) {
1772a07cd24cSSatish Balay     for ( i = 0; i < slen; i++ ) {
1773a07cd24cSSatish Balay       row = lrows[i] + rstart_bs;
1774a07cd24cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1775a07cd24cSSatish Balay     }
1776a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1777a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1778a07cd24cSSatish Balay   }
1779a07cd24cSSatish Balay   PetscFree(lrows);
1780a07cd24cSSatish Balay 
17810ac07820SSatish Balay   /* wait on sends */
17820ac07820SSatish Balay   if (nsends) {
1783d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1784ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
17850ac07820SSatish Balay     PetscFree(send_status);
17860ac07820SSatish Balay   }
17870ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
17880ac07820SSatish Balay 
17893a40ed3dSBarry Smith   PetscFunctionReturn(0);
17900ac07820SSatish Balay }
179172dacd9aSBarry Smith 
1792ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
17935615d1e5SSatish Balay #undef __FUNC__
17945615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1795ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1796ba4ca20aSSatish Balay {
1797ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
179825fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1799133cdb44SSatish Balay   static int  called = 0;
18003a40ed3dSBarry Smith   int         ierr;
1801ba4ca20aSSatish Balay 
1802d64ed03dSBarry Smith   PetscFunctionBegin;
18033a40ed3dSBarry Smith   if (!a->rank) {
18043a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
180525fdafccSSatish Balay   }
180625fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1807133cdb44SSatish Balay   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1808133cdb44SSatish Balay   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
18093a40ed3dSBarry Smith   PetscFunctionReturn(0);
1810ba4ca20aSSatish Balay }
18110ac07820SSatish Balay 
18125615d1e5SSatish Balay #undef __FUNC__
18135615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1814ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1815bb5a7306SBarry Smith {
1816bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1817bb5a7306SBarry Smith   int         ierr;
1818d64ed03dSBarry Smith 
1819d64ed03dSBarry Smith   PetscFunctionBegin;
1820bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
18213a40ed3dSBarry Smith   PetscFunctionReturn(0);
1822bb5a7306SBarry Smith }
1823bb5a7306SBarry Smith 
18242e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18250ac07820SSatish Balay 
182679bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1827cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1828cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1829cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1830cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1831cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1832cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
1833cc2dc46cSBarry Smith   MatMultTrans_MPIBAIJ,
1834cc2dc46cSBarry Smith   MatMultTransAdd_MPIBAIJ,
1835cc2dc46cSBarry Smith   0,
1836cc2dc46cSBarry Smith   0,
1837cc2dc46cSBarry Smith   0,
1838cc2dc46cSBarry Smith   0,
1839cc2dc46cSBarry Smith   0,
1840cc2dc46cSBarry Smith   0,
1841cc2dc46cSBarry Smith   0,
1842cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1843cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
1844cc2dc46cSBarry Smith   0,
1845cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1846cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1847cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1848cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1849cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1850cc2dc46cSBarry Smith   0,
1851cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1852cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1853cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1854cc2dc46cSBarry Smith   0,
1855cc2dc46cSBarry Smith   0,
1856cc2dc46cSBarry Smith   0,
1857cc2dc46cSBarry Smith   0,
1858cc2dc46cSBarry Smith   MatGetSize_MPIBAIJ,
1859cc2dc46cSBarry Smith   MatGetLocalSize_MPIBAIJ,
1860cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1861cc2dc46cSBarry Smith   0,
1862cc2dc46cSBarry Smith   0,
1863cc2dc46cSBarry Smith   0,
1864cc2dc46cSBarry Smith   0,
18652e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1866cc2dc46cSBarry Smith   0,
1867cc2dc46cSBarry Smith   0,
1868cc2dc46cSBarry Smith   0,
1869cc2dc46cSBarry Smith   0,
1870cc2dc46cSBarry Smith   0,
1871cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1872cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1873cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1874cc2dc46cSBarry Smith   0,
1875cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1876cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1877cc2dc46cSBarry Smith   0,
1878cc2dc46cSBarry Smith   0,
1879cc2dc46cSBarry Smith   0,
1880cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1881cc2dc46cSBarry Smith   0,
1882cc2dc46cSBarry Smith   0,
1883cc2dc46cSBarry Smith   0,
1884cc2dc46cSBarry Smith   0,
1885cc2dc46cSBarry Smith   0,
1886cc2dc46cSBarry Smith   0,
1887cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
1888cc2dc46cSBarry Smith   0,
1889cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
1890cc2dc46cSBarry Smith   0,
1891cc2dc46cSBarry Smith   0,
1892cc2dc46cSBarry Smith   0,
1893cc2dc46cSBarry Smith   MatGetMaps_Petsc};
189479bdfe76SSatish Balay 
1895e18c124aSSatish Balay #include "pc.h"
1896e18c124aSSatish Balay EXTERN_C_BEGIN
1897e18c124aSSatish Balay extern int PCSetUp_BJacobi_BAIJ(PC);
1898e18c124aSSatish Balay EXTERN_C_END
189979bdfe76SSatish Balay 
19005615d1e5SSatish Balay #undef __FUNC__
19015615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
190279bdfe76SSatish Balay /*@C
190379bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
190479bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
190579bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
190679bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
190779bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
190879bdfe76SSatish Balay 
1909db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1910db81eaa0SLois Curfman McInnes 
191179bdfe76SSatish Balay    Input Parameters:
1912db81eaa0SLois Curfman McInnes +  comm - MPI communicator
191379bdfe76SSatish Balay .  bs   - size of blockk
191479bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
191592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
191692e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
191792e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
191892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
191992e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
1920be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1921be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
192279bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
192379bdfe76SSatish Balay            submatrix  (same for all local rows)
192492e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
192592e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
1926db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
192792e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
192879bdfe76SSatish Balay            submatrix (same for all local rows).
1929db81eaa0SLois Curfman McInnes -  o_nzz - array containing the number of nonzeros in the various block rows of the
193092e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
193192e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
193279bdfe76SSatish Balay 
193379bdfe76SSatish Balay    Output Parameter:
193479bdfe76SSatish Balay .  A - the matrix
193579bdfe76SSatish Balay 
1936db81eaa0SLois Curfman McInnes    Options Database Keys:
1937db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
1938db81eaa0SLois Curfman McInnes                      block calculations (much slower)
1939db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
1940494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
1941494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
19423ffaccefSLois Curfman McInnes 
1943b259b22eSLois Curfman McInnes    Notes:
194479bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
194579bdfe76SSatish Balay    (possibly both).
194679bdfe76SSatish Balay 
1947be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1948be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
1949be79a94dSBarry Smith 
195079bdfe76SSatish Balay    Storage Information:
195179bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
195279bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
195379bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
195479bdfe76SSatish Balay    local matrix (a rectangular submatrix).
195579bdfe76SSatish Balay 
195679bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
195779bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
195879bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
195979bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
196079bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
196179bdfe76SSatish Balay 
196279bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
196379bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
196479bdfe76SSatish Balay 
1965db81eaa0SLois Curfman McInnes .vb
1966db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
1967db81eaa0SLois Curfman McInnes           -------------------
1968db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
1969db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
1970db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
1971db81eaa0SLois Curfman McInnes           -------------------
1972db81eaa0SLois Curfman McInnes .ve
197379bdfe76SSatish Balay 
197479bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
197579bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
197679bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
197757b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
197879bdfe76SSatish Balay 
1979d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1980d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
198179bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
198292e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
198392e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
19846da5968aSLois Curfman McInnes    matrices.
198579bdfe76SSatish Balay 
198692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
198779bdfe76SSatish Balay 
1988db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
198979bdfe76SSatish Balay @*/
199079bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
199179bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
199279bdfe76SSatish Balay {
199379bdfe76SSatish Balay   Mat          B;
199479bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
1995133cdb44SSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1996a2ab621fSBarry Smith   int          flag1 = 0,flag2 = 0;
199779bdfe76SSatish Balay 
1998d64ed03dSBarry Smith   PetscFunctionBegin;
1999a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
20003914022bSBarry Smith 
20013914022bSBarry Smith   MPI_Comm_size(comm,&size);
2002494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
2003494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
2004494eafd4SBarry Smith   if (!flag1 && !flag2 && size == 1) {
20053914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
20063914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
20073914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
20083a40ed3dSBarry Smith     PetscFunctionReturn(0);
20093914022bSBarry Smith   }
20103914022bSBarry Smith 
201179bdfe76SSatish Balay   *A = 0;
20123f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
201379bdfe76SSatish Balay   PLogObjectCreate(B);
201479bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
201579bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
2016cc2dc46cSBarry Smith   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
20174c50302cSBarry Smith 
2018e1311b90SBarry Smith   B->ops->destroy    = MatDestroy_MPIBAIJ;
2019e1311b90SBarry Smith   B->ops->view       = MatView_MPIBAIJ;
202090f02eecSBarry Smith   B->mapping    = 0;
202179bdfe76SSatish Balay   B->factor     = 0;
202279bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
202379bdfe76SSatish Balay 
2024e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
202579bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
202679bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
202779bdfe76SSatish Balay 
2028d64ed03dSBarry Smith   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2029a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2030d64ed03dSBarry Smith   }
2031a8c6a408SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2032a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2033a8c6a408SBarry Smith   }
2034a8c6a408SBarry Smith   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2035a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2036a8c6a408SBarry Smith   }
2037cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2038cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
203979bdfe76SSatish Balay 
204079bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
204179bdfe76SSatish Balay     work[0] = m; work[1] = n;
204279bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
2043ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
204479bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
204579bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
204679bdfe76SSatish Balay   }
204779bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
204879bdfe76SSatish Balay     Mbs = M/bs;
2049a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
205079bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
205179bdfe76SSatish Balay     m   = mbs*bs;
205279bdfe76SSatish Balay   }
205379bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
205479bdfe76SSatish Balay     Nbs = N/bs;
2055a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
205679bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
205779bdfe76SSatish Balay     n   = nbs*bs;
205879bdfe76SSatish Balay   }
2059a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
2060a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2061a8c6a408SBarry Smith   }
206279bdfe76SSatish Balay 
206379bdfe76SSatish Balay   b->m = m; B->m = m;
206479bdfe76SSatish Balay   b->n = n; B->n = n;
206579bdfe76SSatish Balay   b->N = N; B->N = N;
206679bdfe76SSatish Balay   b->M = M; B->M = M;
206779bdfe76SSatish Balay   b->bs  = bs;
206879bdfe76SSatish Balay   b->bs2 = bs*bs;
206979bdfe76SSatish Balay   b->mbs = mbs;
207079bdfe76SSatish Balay   b->nbs = nbs;
207179bdfe76SSatish Balay   b->Mbs = Mbs;
207279bdfe76SSatish Balay   b->Nbs = Nbs;
207379bdfe76SSatish Balay 
2074c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
2075c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
2076488ecbafSBarry Smith   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2077488ecbafSBarry Smith   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2078c7fcc2eaSBarry Smith 
207979bdfe76SSatish Balay   /* build local table of row and column ownerships */
208079bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
2081f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
20820ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
2083ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
208479bdfe76SSatish Balay   b->rowners[0] = 0;
208579bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
208679bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
208779bdfe76SSatish Balay   }
208879bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
208979bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
20904fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
20914fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
20924fa0d573SSatish Balay 
2093ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
209479bdfe76SSatish Balay   b->cowners[0] = 0;
209579bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
209679bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
209779bdfe76SSatish Balay   }
209879bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
209979bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
21004fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
21014fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
210279bdfe76SSatish Balay 
2103a07cd24cSSatish Balay 
210479bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2105029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
210679bdfe76SSatish Balay   PLogObjectParent(B,b->A);
210779bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2108029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
210979bdfe76SSatish Balay   PLogObjectParent(B,b->B);
211079bdfe76SSatish Balay 
211179bdfe76SSatish Balay   /* build cache for off array entries formed */
211279bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
211390f02eecSBarry Smith   b->donotstash  = 0;
211479bdfe76SSatish Balay   b->colmap      = 0;
211579bdfe76SSatish Balay   b->garray      = 0;
211679bdfe76SSatish Balay   b->roworiented = 1;
211779bdfe76SSatish Balay 
211830793edcSSatish Balay   /* stuff used in block assembly */
211930793edcSSatish Balay   b->barray       = 0;
212030793edcSSatish Balay 
212179bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
212279bdfe76SSatish Balay   b->lvec         = 0;
212379bdfe76SSatish Balay   b->Mvctx        = 0;
212479bdfe76SSatish Balay 
212579bdfe76SSatish Balay   /* stuff for MatGetRow() */
212679bdfe76SSatish Balay   b->rowindices   = 0;
212779bdfe76SSatish Balay   b->rowvalues    = 0;
212879bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
212979bdfe76SSatish Balay 
2130a07cd24cSSatish Balay   /* hash table stuff */
2131a07cd24cSSatish Balay   b->ht           = 0;
2132187ce0cbSSatish Balay   b->hd           = 0;
21330bdbc534SSatish Balay   b->ht_size      = 0;
2134133cdb44SSatish Balay   b->ht_flag      = 0;
213525fdafccSSatish Balay   b->ht_fact      = 0;
2136187ce0cbSSatish Balay   b->ht_total_ct  = 0;
2137187ce0cbSSatish Balay   b->ht_insert_ct = 0;
2138a07cd24cSSatish Balay 
213979bdfe76SSatish Balay   *A = B;
2140133cdb44SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2141133cdb44SSatish Balay   if (flg) {
2142133cdb44SSatish Balay     double fact = 1.39;
2143133cdb44SSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2144133cdb44SSatish Balay     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2145133cdb44SSatish Balay     if (fact <= 1.0) fact = 1.39;
2146133cdb44SSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2147133cdb44SSatish Balay     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2148133cdb44SSatish Balay   }
2149e18c124aSSatish Balay   ierr = PetscObjectComposeFunction((PetscObject)B,"PCSetUp_BJacobi_C",
2150e18c124aSSatish Balay                                      "PCSetUp_BJacobi_BAIJ",
2151e18c124aSSatish Balay                                      (void*)PCSetUp_BJacobi_BAIJ);CHKERRQ(ierr);
21523a40ed3dSBarry Smith   PetscFunctionReturn(0);
215379bdfe76SSatish Balay }
2154026e39d0SSatish Balay 
21555615d1e5SSatish Balay #undef __FUNC__
21562e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ"
21572e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
21580ac07820SSatish Balay {
21590ac07820SSatish Balay   Mat         mat;
21600ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
21610ac07820SSatish Balay   int         ierr, len=0, flg;
21620ac07820SSatish Balay 
2163d64ed03dSBarry Smith   PetscFunctionBegin;
21640ac07820SSatish Balay   *newmat       = 0;
21653f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
21660ac07820SSatish Balay   PLogObjectCreate(mat);
21670ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2168cc2dc46cSBarry Smith   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2169e1311b90SBarry Smith   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2170e1311b90SBarry Smith   mat->ops->view       = MatView_MPIBAIJ;
21710ac07820SSatish Balay   mat->factor          = matin->factor;
21720ac07820SSatish Balay   mat->assembled       = PETSC_TRUE;
21730ac07820SSatish Balay 
21740ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
21750ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
21760ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
21770ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
21780ac07820SSatish Balay 
21790ac07820SSatish Balay   a->bs  = oldmat->bs;
21800ac07820SSatish Balay   a->bs2 = oldmat->bs2;
21810ac07820SSatish Balay   a->mbs = oldmat->mbs;
21820ac07820SSatish Balay   a->nbs = oldmat->nbs;
21830ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
21840ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
21850ac07820SSatish Balay 
21860ac07820SSatish Balay   a->rstart       = oldmat->rstart;
21870ac07820SSatish Balay   a->rend         = oldmat->rend;
21880ac07820SSatish Balay   a->cstart       = oldmat->cstart;
21890ac07820SSatish Balay   a->cend         = oldmat->cend;
21900ac07820SSatish Balay   a->size         = oldmat->size;
21910ac07820SSatish Balay   a->rank         = oldmat->rank;
2192e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
21930ac07820SSatish Balay   a->rowvalues    = 0;
21940ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
219530793edcSSatish Balay   a->barray       = 0;
21960ac07820SSatish Balay 
2197133cdb44SSatish Balay   /* hash table stuff */
2198133cdb44SSatish Balay   a->ht           = 0;
2199133cdb44SSatish Balay   a->hd           = 0;
2200133cdb44SSatish Balay   a->ht_size      = 0;
2201133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
220225fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2203133cdb44SSatish Balay   a->ht_total_ct  = 0;
2204133cdb44SSatish Balay   a->ht_insert_ct = 0;
2205133cdb44SSatish Balay 
2206133cdb44SSatish Balay 
22070ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2208f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
22090ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
22100ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
22110ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
22120ac07820SSatish Balay   if (oldmat->colmap) {
221348e59246SSatish Balay #if defined (USE_CTABLE)
221448e59246SSatish Balay   ierr = TableCreateCopy( &a->colmap,oldmat->colmap ); CHKERRQ(ierr);
221548e59246SSatish Balay #else
22160ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
22170ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
22180ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
221948e59246SSatish Balay #endif
22200ac07820SSatish Balay   } else a->colmap = 0;
22210ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
22220ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
22230ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
22240ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
22250ac07820SSatish Balay   } else a->garray = 0;
22260ac07820SSatish Balay 
22270ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
22280ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
22290ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2230e18c124aSSatish Balay 
22310ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
22322e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
22330ac07820SSatish Balay   PLogObjectParent(mat,a->A);
22342e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
22350ac07820SSatish Balay   PLogObjectParent(mat,a->B);
22360ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2237e18c124aSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
22380ac07820SSatish Balay   if (flg) {
22390ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
22400ac07820SSatish Balay   }
22410ac07820SSatish Balay   *newmat = mat;
22423a40ed3dSBarry Smith   PetscFunctionReturn(0);
22430ac07820SSatish Balay }
224457b952d6SSatish Balay 
224557b952d6SSatish Balay #include "sys.h"
224657b952d6SSatish Balay 
22475615d1e5SSatish Balay #undef __FUNC__
22485615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
224957b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
225057b952d6SSatish Balay {
225157b952d6SSatish Balay   Mat          A;
225257b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
225357b952d6SSatish Balay   Scalar       *vals,*buf;
225457b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
225557b952d6SSatish Balay   MPI_Status   status;
2256cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
225757b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
225840011551SBarry Smith   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
225957b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
226057b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
226157b952d6SSatish Balay 
2262d64ed03dSBarry Smith   PetscFunctionBegin;
226357b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
226457b952d6SSatish Balay 
226557b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
226657b952d6SSatish Balay   if (!rank) {
226757b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2268e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2269a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2270d64ed03dSBarry Smith     if (header[3] < 0) {
2271a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2272d64ed03dSBarry Smith     }
22736c5fab8fSBarry Smith   }
2274d64ed03dSBarry Smith 
2275ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
227657b952d6SSatish Balay   M = header[1]; N = header[2];
227757b952d6SSatish Balay 
2278a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
227957b952d6SSatish Balay 
228057b952d6SSatish Balay   /*
228157b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
228257b952d6SSatish Balay      divisible by the blocksize
228357b952d6SSatish Balay   */
228457b952d6SSatish Balay   Mbs        = M/bs;
228557b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
228657b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
228757b952d6SSatish Balay   else                  Mbs++;
228857b952d6SSatish Balay   if (extra_rows &&!rank) {
2289b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
229057b952d6SSatish Balay   }
2291537820f0SBarry Smith 
229257b952d6SSatish Balay   /* determine ownership of all rows */
229357b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
229457b952d6SSatish Balay   m   = mbs * bs;
2295cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2296cee3aa6bSSatish Balay   browners = rowners + size + 1;
2297ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
229857b952d6SSatish Balay   rowners[0] = 0;
2299cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2300cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
230157b952d6SSatish Balay   rstart = rowners[rank];
230257b952d6SSatish Balay   rend   = rowners[rank+1];
230357b952d6SSatish Balay 
230457b952d6SSatish Balay   /* distribute row lengths to all processors */
230557b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
230657b952d6SSatish Balay   if (!rank) {
230757b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2308e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
230957b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
231057b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2311cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2312ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
231357b952d6SSatish Balay     PetscFree(sndcounts);
2314d64ed03dSBarry Smith   } else {
2315ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
231657b952d6SSatish Balay   }
231757b952d6SSatish Balay 
231857b952d6SSatish Balay   if (!rank) {
231957b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
232057b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
232157b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
232257b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
232357b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
232457b952d6SSatish Balay         procsnz[i] += rowlengths[j];
232557b952d6SSatish Balay       }
232657b952d6SSatish Balay     }
232757b952d6SSatish Balay     PetscFree(rowlengths);
232857b952d6SSatish Balay 
232957b952d6SSatish Balay     /* determine max buffer needed and allocate it */
233057b952d6SSatish Balay     maxnz = 0;
233157b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
233257b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
233357b952d6SSatish Balay     }
233457b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
233557b952d6SSatish Balay 
233657b952d6SSatish Balay     /* read in my part of the matrix column indices  */
233757b952d6SSatish Balay     nz = procsnz[0];
233857b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
233957b952d6SSatish Balay     mycols = ibuf;
2340cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2341e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2342cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2343cee3aa6bSSatish Balay 
234457b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
234557b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
234657b952d6SSatish Balay       nz   = procsnz[i];
2347e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2348ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
234957b952d6SSatish Balay     }
235057b952d6SSatish Balay     /* read in the stuff for the last proc */
235157b952d6SSatish Balay     if ( size != 1 ) {
235257b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2353e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
235457b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2355ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
235657b952d6SSatish Balay     }
235757b952d6SSatish Balay     PetscFree(cols);
2358d64ed03dSBarry Smith   } else {
235957b952d6SSatish Balay     /* determine buffer space needed for message */
236057b952d6SSatish Balay     nz = 0;
236157b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
236257b952d6SSatish Balay       nz += locrowlens[i];
236357b952d6SSatish Balay     }
236457b952d6SSatish Balay     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
236557b952d6SSatish Balay     mycols = ibuf;
236657b952d6SSatish Balay     /* receive message of column indices*/
2367ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2368ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2369a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
237057b952d6SSatish Balay   }
237157b952d6SSatish Balay 
237257b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2373cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2374cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
237557b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2376cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
237757b952d6SSatish Balay   masked1 = mask    + Mbs;
237857b952d6SSatish Balay   masked2 = masked1 + Mbs;
237957b952d6SSatish Balay   rowcount = 0; nzcount = 0;
238057b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
238157b952d6SSatish Balay     dcount  = 0;
238257b952d6SSatish Balay     odcount = 0;
238357b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
238457b952d6SSatish Balay       kmax = locrowlens[rowcount];
238557b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
238657b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
238757b952d6SSatish Balay         if (!mask[tmp]) {
238857b952d6SSatish Balay           mask[tmp] = 1;
238957b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
239057b952d6SSatish Balay           else masked1[dcount++] = tmp;
239157b952d6SSatish Balay         }
239257b952d6SSatish Balay       }
239357b952d6SSatish Balay       rowcount++;
239457b952d6SSatish Balay     }
2395cee3aa6bSSatish Balay 
239657b952d6SSatish Balay     dlens[i]  = dcount;
239757b952d6SSatish Balay     odlens[i] = odcount;
2398cee3aa6bSSatish Balay 
239957b952d6SSatish Balay     /* zero out the mask elements we set */
240057b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
240157b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
240257b952d6SSatish Balay   }
2403cee3aa6bSSatish Balay 
240457b952d6SSatish Balay   /* create our matrix */
2405537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2406537820f0SBarry Smith          CHKERRQ(ierr);
240757b952d6SSatish Balay   A = *newmat;
24086d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
240957b952d6SSatish Balay 
241057b952d6SSatish Balay   if (!rank) {
241157b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
241257b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
241357b952d6SSatish Balay     nz = procsnz[0];
241457b952d6SSatish Balay     vals = buf;
2415cee3aa6bSSatish Balay     mycols = ibuf;
2416cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2417e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2418cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2419537820f0SBarry Smith 
242057b952d6SSatish Balay     /* insert into matrix */
242157b952d6SSatish Balay     jj      = rstart*bs;
242257b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
242357b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
242457b952d6SSatish Balay       mycols += locrowlens[i];
242557b952d6SSatish Balay       vals   += locrowlens[i];
242657b952d6SSatish Balay       jj++;
242757b952d6SSatish Balay     }
242857b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
242957b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
243057b952d6SSatish Balay       nz   = procsnz[i];
243157b952d6SSatish Balay       vals = buf;
2432e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2433ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
243457b952d6SSatish Balay     }
243557b952d6SSatish Balay     /* the last proc */
243657b952d6SSatish Balay     if ( size != 1 ){
243757b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2438cee3aa6bSSatish Balay       vals = buf;
2439e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
244057b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2441ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
244257b952d6SSatish Balay     }
244357b952d6SSatish Balay     PetscFree(procsnz);
2444d64ed03dSBarry Smith   } else {
244557b952d6SSatish Balay     /* receive numeric values */
244657b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
244757b952d6SSatish Balay 
244857b952d6SSatish Balay     /* receive message of values*/
244957b952d6SSatish Balay     vals   = buf;
2450cee3aa6bSSatish Balay     mycols = ibuf;
2451ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2452ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2453a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
245457b952d6SSatish Balay 
245557b952d6SSatish Balay     /* insert into matrix */
245657b952d6SSatish Balay     jj      = rstart*bs;
2457cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
245857b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
245957b952d6SSatish Balay       mycols += locrowlens[i];
246057b952d6SSatish Balay       vals   += locrowlens[i];
246157b952d6SSatish Balay       jj++;
246257b952d6SSatish Balay     }
246357b952d6SSatish Balay   }
246457b952d6SSatish Balay   PetscFree(locrowlens);
246557b952d6SSatish Balay   PetscFree(buf);
246657b952d6SSatish Balay   PetscFree(ibuf);
246757b952d6SSatish Balay   PetscFree(rowners);
246857b952d6SSatish Balay   PetscFree(dlens);
2469cee3aa6bSSatish Balay   PetscFree(mask);
24706d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
24716d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
24723a40ed3dSBarry Smith   PetscFunctionReturn(0);
247357b952d6SSatish Balay }
247457b952d6SSatish Balay 
247557b952d6SSatish Balay 
2476133cdb44SSatish Balay 
2477133cdb44SSatish Balay #undef __FUNC__
2478133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2479133cdb44SSatish Balay /*@
2480133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2481133cdb44SSatish Balay 
2482133cdb44SSatish Balay    Input Parameters:
2483133cdb44SSatish Balay .  mat  - the matrix
2484133cdb44SSatish Balay .  fact - factor
2485133cdb44SSatish Balay 
2486fee21e36SBarry Smith    Collective on Mat
2487fee21e36SBarry Smith 
2488133cdb44SSatish Balay   Notes:
2489133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2490133cdb44SSatish Balay 
2491133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2492133cdb44SSatish Balay 
2493133cdb44SSatish Balay .seealso: MatSetOption()
2494133cdb44SSatish Balay @*/
2495133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2496133cdb44SSatish Balay {
249725fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2498133cdb44SSatish Balay 
2499133cdb44SSatish Balay   PetscFunctionBegin;
2500133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
250125fdafccSSatish Balay   if (mat->type != MATMPIBAIJ) {
2502133cdb44SSatish Balay       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2503133cdb44SSatish Balay   }
2504133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*) mat->data;
2505133cdb44SSatish Balay   baij->ht_fact = fact;
2506133cdb44SSatish Balay   PetscFunctionReturn(0);
2507133cdb44SSatish Balay }
2508