xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4787f7683dc5ea85e35b4269f7afceca0dae908f)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*4787f768SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.147 1999/02/12 00:04:25 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
577ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
957b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
10d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
117b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
133b2fbd54SBarry Smith 
14537820f0SBarry Smith /*
15537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
1657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
1757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
1857b952d6SSatish Balay    length of colmap equals the global matrix length.
1957b952d6SSatish Balay */
205615d1e5SSatish Balay #undef __FUNC__
215615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
2257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2357b952d6SSatish Balay {
2457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
266dee3f7eSBarry Smith   int         nbs = B->nbs,i,bs=B->bs;
276dee3f7eSBarry Smith #if defined (USE_CTABLE)
286dee3f7eSBarry Smith   int         ierr;
296dee3f7eSBarry Smith #endif
3057b952d6SSatish Balay 
31d64ed03dSBarry Smith   PetscFunctionBegin;
3248e59246SSatish Balay #if defined (USE_CTABLE)
3348e59246SSatish Balay   ierr = TableCreate( &baij->colmap, baij->nbs/5 ); CHKERRQ(ierr);
3448e59246SSatish Balay   for ( i=0; i<nbs; i++ ){
3548e59246SSatish Balay     ierr = TableAdd( baij->colmap, baij->garray[i] + 1, i*bs+1 );CHKERRQ(ierr);
3648e59246SSatish Balay   }
3748e59246SSatish Balay #else
38758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3957b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4057b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
41928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4248e59246SSatish Balay #endif
433a40ed3dSBarry Smith   PetscFunctionReturn(0);
4457b952d6SSatish Balay }
4557b952d6SSatish Balay 
4680c1aa95SSatish Balay #define CHUNKSIZE  10
4780c1aa95SSatish Balay 
48f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
4980c1aa95SSatish Balay { \
5080c1aa95SSatish Balay  \
5180c1aa95SSatish Balay     brow = row/bs;  \
5280c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
53ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
5480c1aa95SSatish Balay       bcol = col/bs; \
5580c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
56ab26458aSBarry Smith       low = 0; high = nrow; \
57ab26458aSBarry Smith       while (high-low > 3) { \
58ab26458aSBarry Smith         t = (low+high)/2; \
59ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
60ab26458aSBarry Smith         else              low  = t; \
61ab26458aSBarry Smith       } \
62ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
6380c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
6480c1aa95SSatish Balay         if (rp[_i] == bcol) { \
6580c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
66eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
67eada6651SSatish Balay           else                    *bap  = value;  \
68ac7a638eSSatish Balay           goto a_noinsert; \
6980c1aa95SSatish Balay         } \
7080c1aa95SSatish Balay       } \
7189280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
72a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
7380c1aa95SSatish Balay       if (nrow >= rmax) { \
7480c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
7580c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
7680c1aa95SSatish Balay         Scalar *new_a; \
7780c1aa95SSatish Balay  \
78a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
7989280ab3SLois Curfman McInnes  \
8080c1aa95SSatish Balay         /* malloc new storage space */ \
8180c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
8280c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
8380c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
8480c1aa95SSatish Balay         new_i   = new_j + new_nz; \
8580c1aa95SSatish Balay  \
8680c1aa95SSatish Balay         /* copy over old data into new slots */ \
8780c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
8880c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
8980c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
9080c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
9180c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
9280c1aa95SSatish Balay                                                            len*sizeof(int)); \
9380c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
9480c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
9580c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
9680c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
9780c1aa95SSatish Balay         /* free up old matrix storage */ \
9880c1aa95SSatish Balay         PetscFree(a->a);  \
9980c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
10080c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
10180c1aa95SSatish Balay         a->singlemalloc = 1; \
10280c1aa95SSatish Balay  \
10380c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
104ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
10580c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
10680c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
10780c1aa95SSatish Balay         a->reallocs++; \
10880c1aa95SSatish Balay         a->nz++; \
10980c1aa95SSatish Balay       } \
11080c1aa95SSatish Balay       N = nrow++ - 1;  \
11180c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
11280c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
11380c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
11480c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
11580c1aa95SSatish Balay       } \
11680c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
11780c1aa95SSatish Balay       rp[_i]                      = bcol;  \
11880c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
119ac7a638eSSatish Balay       a_noinsert:; \
12080c1aa95SSatish Balay     ailen[brow] = nrow; \
12180c1aa95SSatish Balay }
12257b952d6SSatish Balay 
123ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
124ac7a638eSSatish Balay { \
125ac7a638eSSatish Balay  \
126ac7a638eSSatish Balay     brow = row/bs;  \
127ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
128ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
129ac7a638eSSatish Balay       bcol = col/bs; \
130ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
131ac7a638eSSatish Balay       low = 0; high = nrow; \
132ac7a638eSSatish Balay       while (high-low > 3) { \
133ac7a638eSSatish Balay         t = (low+high)/2; \
134ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
135ac7a638eSSatish Balay         else              low  = t; \
136ac7a638eSSatish Balay       } \
137ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
138ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
139ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
140ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
141ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
142ac7a638eSSatish Balay           else                    *bap  = value;  \
143ac7a638eSSatish Balay           goto b_noinsert; \
144ac7a638eSSatish Balay         } \
145ac7a638eSSatish Balay       } \
14689280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
147a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
148ac7a638eSSatish Balay       if (nrow >= rmax) { \
149ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
15074c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
151ac7a638eSSatish Balay         Scalar *new_a; \
152ac7a638eSSatish Balay  \
153a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
15489280ab3SLois Curfman McInnes  \
155ac7a638eSSatish Balay         /* malloc new storage space */ \
15674c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
157ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
158ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
159ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
160ac7a638eSSatish Balay  \
161ac7a638eSSatish Balay         /* copy over old data into new slots */ \
162ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
16374c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
164ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
165ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
166ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
167ac7a638eSSatish Balay                                                            len*sizeof(int)); \
168ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
169ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
170ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
171ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
172ac7a638eSSatish Balay         /* free up old matrix storage */ \
17374c639caSSatish Balay         PetscFree(b->a);  \
17474c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
17574c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
17674c639caSSatish Balay         b->singlemalloc = 1; \
177ac7a638eSSatish Balay  \
178ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
179ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
18074c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
18174c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
18274c639caSSatish Balay         b->reallocs++; \
18374c639caSSatish Balay         b->nz++; \
184ac7a638eSSatish Balay       } \
185ac7a638eSSatish Balay       N = nrow++ - 1;  \
186ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
187ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
188ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
189ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
190ac7a638eSSatish Balay       } \
191ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
192ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
193ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
194ac7a638eSSatish Balay       b_noinsert:; \
195ac7a638eSSatish Balay     bilen[brow] = nrow; \
196ac7a638eSSatish Balay }
197ac7a638eSSatish Balay 
1985615d1e5SSatish Balay #undef __FUNC__
1995615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
200ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
20157b952d6SSatish Balay {
20257b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
20357b952d6SSatish Balay   Scalar      value;
2044fa0d573SSatish Balay   int         ierr,i,j,row,col;
2054fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2064fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2074fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
20857b952d6SSatish Balay 
209eada6651SSatish Balay   /* Some Variables required in the macro */
21080c1aa95SSatish Balay   Mat         A = baij->A;
21180c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
212ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
213ac7a638eSSatish Balay   Scalar      *aa=a->a;
214ac7a638eSSatish Balay 
215ac7a638eSSatish Balay   Mat         B = baij->B;
216ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
217ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
218ac7a638eSSatish Balay   Scalar      *ba=b->a;
219ac7a638eSSatish Balay 
220ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
221ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
222ac7a638eSSatish Balay   Scalar      *ap,*bap;
22380c1aa95SSatish Balay 
224d64ed03dSBarry Smith   PetscFunctionBegin;
22557b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
2265ef9f2a5SBarry Smith     if (im[i] < 0) continue;
2273a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
228a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
229639f9d9dSBarry Smith #endif
23057b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
23157b952d6SSatish Balay       row = im[i] - rstart_orig;
23257b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
23357b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
23457b952d6SSatish Balay           col = in[j] - cstart_orig;
23557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
236f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
23780c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
23857b952d6SSatish Balay         }
2395ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
2403a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
241a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
242639f9d9dSBarry Smith #endif
24357b952d6SSatish Balay         else {
24457b952d6SSatish Balay           if (mat->was_assembled) {
245905e6a2fSBarry Smith             if (!baij->colmap) {
246905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
247905e6a2fSBarry Smith             }
24848e59246SSatish Balay #if defined (USE_CTABLE)
24948e59246SSatish Balay 	    col = TableFind( baij->colmap, in[j]/bs + 1 ) - 1 + in[j]%bs;
25048e59246SSatish Balay #else
251905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
25248e59246SSatish Balay #endif
25357b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
25457b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
25557b952d6SSatish Balay               col =  in[j];
2569bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2579bf004c3SSatish Balay               B = baij->B;
2589bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2599bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2609bf004c3SSatish Balay               ba=b->a;
26157b952d6SSatish Balay             }
262d64ed03dSBarry Smith           } else col = in[j];
26357b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
264ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
265ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
26657b952d6SSatish Balay         }
26757b952d6SSatish Balay       }
268d64ed03dSBarry Smith     } else {
26990f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
27057b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
271d64ed03dSBarry Smith       } else {
27290f02eecSBarry Smith         if (!baij->donotstash) {
27357b952d6SSatish Balay           row = im[i];
27457b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
27557b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
27657b952d6SSatish Balay           }
27757b952d6SSatish Balay         }
27857b952d6SSatish Balay       }
27957b952d6SSatish Balay     }
28090f02eecSBarry Smith   }
2813a40ed3dSBarry Smith   PetscFunctionReturn(0);
28257b952d6SSatish Balay }
28357b952d6SSatish Balay 
284ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
285ab26458aSBarry Smith #undef __FUNC__
286ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
287ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
288ab26458aSBarry Smith {
289ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
29030793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
291abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
292ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
293ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
294ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
295ab26458aSBarry Smith 
29630793edcSSatish Balay   if(!barray) {
29747513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
29830793edcSSatish Balay   }
29930793edcSSatish Balay 
300ab26458aSBarry Smith   if (roworiented) {
301ab26458aSBarry Smith     stepval = (n-1)*bs;
302ab26458aSBarry Smith   } else {
303ab26458aSBarry Smith     stepval = (m-1)*bs;
304ab26458aSBarry Smith   }
305ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
3065ef9f2a5SBarry Smith     if (im[i] < 0) continue;
3073a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
3085ef9f2a5SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
309ab26458aSBarry Smith #endif
310ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
311ab26458aSBarry Smith       row = im[i] - rstart;
312ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
31315b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
31415b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
31515b57d14SSatish Balay           barray = v + i*bs2;
31615b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
31715b57d14SSatish Balay           barray = v + j*bs2;
31815b57d14SSatish Balay         } else { /* Here a copy is required */
319ab26458aSBarry Smith           if (roworiented) {
320ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
321ab26458aSBarry Smith           } else {
322ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
323abef11f7SSatish Balay           }
32447513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
32547513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
32630793edcSSatish Balay               *barray++  = *value++;
32747513183SBarry Smith             }
32847513183SBarry Smith           }
32930793edcSSatish Balay           barray -=bs2;
33015b57d14SSatish Balay         }
331abef11f7SSatish Balay 
332abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
333abef11f7SSatish Balay           col  = in[j] - cstart;
33430793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
335ab26458aSBarry Smith         }
3365ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
3373a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
3385ef9f2a5SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
339ab26458aSBarry Smith #endif
340ab26458aSBarry Smith         else {
341ab26458aSBarry Smith           if (mat->was_assembled) {
342ab26458aSBarry Smith             if (!baij->colmap) {
343ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
344ab26458aSBarry Smith             }
345a5eb4965SSatish Balay 
3463a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
34748e59246SSatish Balay #if defined (USE_CTABLE)
34848e59246SSatish Balay             if( (TableFind( baij->colmap, in[j] + 1 ) - 1) % bs)
34948e59246SSatish Balay 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
35048e59246SSatish Balay #else
351a8c6a408SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
352a5eb4965SSatish Balay #endif
35348e59246SSatish Balay #endif
35448e59246SSatish Balay #if defined (USE_CTABLE)
35548e59246SSatish Balay 	    col = (TableFind( baij->colmap, in[j] + 1 ) - 1)/bs;
35648e59246SSatish Balay #else
357a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
35848e59246SSatish Balay #endif
359ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
360ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
361ab26458aSBarry Smith               col =  in[j];
362ab26458aSBarry Smith             }
363ab26458aSBarry Smith           }
364ab26458aSBarry Smith           else col = in[j];
36530793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
366ab26458aSBarry Smith         }
367ab26458aSBarry Smith       }
368d64ed03dSBarry Smith     } else {
369ab26458aSBarry Smith       if (!baij->donotstash) {
370ab26458aSBarry Smith         if (roworiented ) {
371abef11f7SSatish Balay           row   = im[i]*bs;
372abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
373abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
374abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
375abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
376abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
377abef11f7SSatish Balay               }
378ab26458aSBarry Smith             }
379ab26458aSBarry Smith           }
380d64ed03dSBarry Smith         } else {
381ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
382abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
383abef11f7SSatish Balay             col   = in[j]*bs;
384abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
385abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
386abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
387ab26458aSBarry Smith               }
388ab26458aSBarry Smith             }
389ab26458aSBarry Smith           }
390abef11f7SSatish Balay         }
391abef11f7SSatish Balay       }
392ab26458aSBarry Smith     }
393ab26458aSBarry Smith   }
3943a40ed3dSBarry Smith   PetscFunctionReturn(0);
395ab26458aSBarry Smith }
3960bdbc534SSatish Balay #define HASH_KEY 0.6180339887
397c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
398c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
399c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
4005615d1e5SSatish Balay #undef __FUNC__
4010bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
4020bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4030bdbc534SSatish Balay {
4040bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4050bdbc534SSatish Balay   int         ierr,i,j,row,col;
4060bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
407c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
408c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
409c2760754SSatish Balay   double      tmp;
410b9e4cc15SSatish Balay   Scalar      ** HD = baij->hd,value;
4114a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
4124a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4134a15367fSSatish Balay #endif
4140bdbc534SSatish Balay 
4150bdbc534SSatish Balay   PetscFunctionBegin;
4160bdbc534SSatish Balay 
4170bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
4180bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
4190bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4200bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4210bdbc534SSatish Balay #endif
4220bdbc534SSatish Balay       row = im[i];
423c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4240bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4250bdbc534SSatish Balay         col = in[j];
4260bdbc534SSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4270bdbc534SSatish Balay         /* Look up into the Hash Table */
428c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
429c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4300bdbc534SSatish Balay 
431c2760754SSatish Balay 
432c2760754SSatish Balay         idx = h1;
433187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
434187ce0cbSSatish Balay         insert_ct++;
435187ce0cbSSatish Balay         total_ct++;
436187ce0cbSSatish Balay         if (HT[idx] != key) {
437187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
438187ce0cbSSatish Balay           if (idx == size) {
439187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
440187ce0cbSSatish Balay             if (idx == h1) {
441187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
442187ce0cbSSatish Balay             }
443187ce0cbSSatish Balay           }
444187ce0cbSSatish Balay         }
445187ce0cbSSatish Balay #else
446c2760754SSatish Balay         if (HT[idx] != key) {
447c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
448c2760754SSatish Balay           if (idx == size) {
449c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
450c2760754SSatish Balay             if (idx == h1) {
451c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
452c2760754SSatish Balay             }
453c2760754SSatish Balay           }
454c2760754SSatish Balay         }
455187ce0cbSSatish Balay #endif
456c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
457c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
458c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
4590bdbc534SSatish Balay       }
4600bdbc534SSatish Balay     } else {
4610bdbc534SSatish Balay       if (roworiented && !baij->donotstash) {
4620bdbc534SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
4630bdbc534SSatish Balay       } else {
4640bdbc534SSatish Balay         if (!baij->donotstash) {
4650bdbc534SSatish Balay           row = im[i];
4660bdbc534SSatish Balay 	  for ( j=0; j<n; j++ ) {
4670bdbc534SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
4680bdbc534SSatish Balay           }
4690bdbc534SSatish Balay         }
4700bdbc534SSatish Balay       }
4710bdbc534SSatish Balay     }
4720bdbc534SSatish Balay   }
473187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
474187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
475187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
476187ce0cbSSatish Balay #endif
4770bdbc534SSatish Balay   PetscFunctionReturn(0);
4780bdbc534SSatish Balay }
4790bdbc534SSatish Balay 
4800bdbc534SSatish Balay #undef __FUNC__
4810bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
4820bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4830bdbc534SSatish Balay {
4840bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4850bdbc534SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
4860bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
487b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
488c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
489c2760754SSatish Balay   double      tmp;
490187ce0cbSSatish Balay   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
4914a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
4924a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4934a15367fSSatish Balay #endif
4940bdbc534SSatish Balay 
495d0a41580SSatish Balay   PetscFunctionBegin;
496d0a41580SSatish Balay 
4970bdbc534SSatish Balay   if (roworiented) {
4980bdbc534SSatish Balay     stepval = (n-1)*bs;
4990bdbc534SSatish Balay   } else {
5000bdbc534SSatish Balay     stepval = (m-1)*bs;
5010bdbc534SSatish Balay   }
5020bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
5030bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
5040bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
5050bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
5060bdbc534SSatish Balay #endif
5070bdbc534SSatish Balay     row   = im[i];
508187ce0cbSSatish Balay     v_t   = v + i*bs2;
509c2760754SSatish Balay     if (row >= rstart && row < rend) {
5100bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
5110bdbc534SSatish Balay         col = in[j];
5120bdbc534SSatish Balay 
5130bdbc534SSatish Balay         /* Look up into the Hash Table */
514c2760754SSatish Balay         key = row*Nbs+col+1;
515c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5160bdbc534SSatish Balay 
517c2760754SSatish Balay         idx = h1;
518187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
519187ce0cbSSatish Balay         total_ct++;
520187ce0cbSSatish Balay         insert_ct++;
521187ce0cbSSatish Balay        if (HT[idx] != key) {
522187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
523187ce0cbSSatish Balay           if (idx == size) {
524187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
525187ce0cbSSatish Balay             if (idx == h1) {
526187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
527187ce0cbSSatish Balay             }
528187ce0cbSSatish Balay           }
529187ce0cbSSatish Balay         }
530187ce0cbSSatish Balay #else
531c2760754SSatish Balay         if (HT[idx] != key) {
532c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
533c2760754SSatish Balay           if (idx == size) {
534c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
535c2760754SSatish Balay             if (idx == h1) {
536c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
537c2760754SSatish Balay             }
538c2760754SSatish Balay           }
539c2760754SSatish Balay         }
540187ce0cbSSatish Balay #endif
541c2760754SSatish Balay         baij_a = HD[idx];
5420bdbc534SSatish Balay         if (roworiented) {
543c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
544187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
545187ce0cbSSatish Balay           value = v_t;
546187ce0cbSSatish Balay           v_t  += bs;
547fef45726SSatish Balay           if (addv == ADD_VALUES) {
548c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
549c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
550fef45726SSatish Balay                 baij_a[jj]  += *value++;
551b4cc0f5aSSatish Balay               }
552b4cc0f5aSSatish Balay             }
553fef45726SSatish Balay           } else {
554c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
555c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
556fef45726SSatish Balay                 baij_a[jj]  = *value++;
557fef45726SSatish Balay               }
558fef45726SSatish Balay             }
559fef45726SSatish Balay           }
5600bdbc534SSatish Balay         } else {
5610bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
562fef45726SSatish Balay           if (addv == ADD_VALUES) {
563b4cc0f5aSSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
5640bdbc534SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
565fef45726SSatish Balay                 baij_a[jj]  += *value++;
566fef45726SSatish Balay               }
567fef45726SSatish Balay             }
568fef45726SSatish Balay           } else {
569fef45726SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
570fef45726SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
571fef45726SSatish Balay                 baij_a[jj]  = *value++;
572fef45726SSatish Balay               }
573b4cc0f5aSSatish Balay             }
5740bdbc534SSatish Balay           }
5750bdbc534SSatish Balay         }
5760bdbc534SSatish Balay       }
5770bdbc534SSatish Balay     } else {
5780bdbc534SSatish Balay       if (!baij->donotstash) {
5790bdbc534SSatish Balay         if (roworiented ) {
5800bdbc534SSatish Balay           row   = im[i]*bs;
5810bdbc534SSatish Balay           value = v + i*(stepval+bs)*bs;
5820bdbc534SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
5830bdbc534SSatish Balay             for ( k=0; k<n; k++ ) {
5840bdbc534SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
5850bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
5860bdbc534SSatish Balay               }
5870bdbc534SSatish Balay             }
5880bdbc534SSatish Balay           }
5890bdbc534SSatish Balay         } else {
5900bdbc534SSatish Balay           for ( j=0; j<n; j++ ) {
5910bdbc534SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
5920bdbc534SSatish Balay             col   = in[j]*bs;
5930bdbc534SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
5940bdbc534SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
5950bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
5960bdbc534SSatish Balay               }
5970bdbc534SSatish Balay             }
5980bdbc534SSatish Balay           }
5990bdbc534SSatish Balay         }
6000bdbc534SSatish Balay       }
6010bdbc534SSatish Balay     }
6020bdbc534SSatish Balay   }
603187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
604187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
605187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
606187ce0cbSSatish Balay #endif
6070bdbc534SSatish Balay   PetscFunctionReturn(0);
6080bdbc534SSatish Balay }
609133cdb44SSatish Balay 
6100bdbc534SSatish Balay #undef __FUNC__
6115615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
612ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
613d6de1c52SSatish Balay {
614d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
615d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
61648e59246SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
617d6de1c52SSatish Balay 
618133cdb44SSatish Balay   PetscFunctionBegin;
619d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
620a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
621a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
622d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
623d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
624d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
625a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
626a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
627d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
628d6de1c52SSatish Balay           col = idxn[j] - bscstart;
62998dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
630d64ed03dSBarry Smith         } else {
631905e6a2fSBarry Smith           if (!baij->colmap) {
632905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
633905e6a2fSBarry Smith           }
63448e59246SSatish Balay #if defined (USE_CTABLE)
63548e59246SSatish Balay           data = TableFind( baij->colmap, idxn[j]/bs + 1 ) - 1;
63648e59246SSatish Balay #else
63748e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
63848e59246SSatish Balay #endif
63948e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
640d9d09a02SSatish Balay           else {
64148e59246SSatish Balay             col  = data + idxn[j]%bs;
64298dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
643d6de1c52SSatish Balay           }
644d6de1c52SSatish Balay         }
645d6de1c52SSatish Balay       }
646d64ed03dSBarry Smith     } else {
647a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
648d6de1c52SSatish Balay     }
649d6de1c52SSatish Balay   }
6503a40ed3dSBarry Smith  PetscFunctionReturn(0);
651d6de1c52SSatish Balay }
652d6de1c52SSatish Balay 
6535615d1e5SSatish Balay #undef __FUNC__
6545615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
655ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
656d6de1c52SSatish Balay {
657d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
658d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
659acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
660d6de1c52SSatish Balay   double     sum = 0.0;
661d6de1c52SSatish Balay   Scalar     *v;
662d6de1c52SSatish Balay 
663d64ed03dSBarry Smith   PetscFunctionBegin;
664d6de1c52SSatish Balay   if (baij->size == 1) {
665d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
666d6de1c52SSatish Balay   } else {
667d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
668d6de1c52SSatish Balay       v = amat->a;
669d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
6703a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
671e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
672d6de1c52SSatish Balay #else
673d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
674d6de1c52SSatish Balay #endif
675d6de1c52SSatish Balay       }
676d6de1c52SSatish Balay       v = bmat->a;
677d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
6783a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
679e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
680d6de1c52SSatish Balay #else
681d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
682d6de1c52SSatish Balay #endif
683d6de1c52SSatish Balay       }
684ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
685d6de1c52SSatish Balay       *norm = sqrt(*norm);
686d64ed03dSBarry Smith     } else {
687e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
688d6de1c52SSatish Balay     }
689d64ed03dSBarry Smith   }
6903a40ed3dSBarry Smith   PetscFunctionReturn(0);
691d6de1c52SSatish Balay }
69257b952d6SSatish Balay 
6935615d1e5SSatish Balay #undef __FUNC__
6945615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
695ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
69657b952d6SSatish Balay {
69757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
69857b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
69957b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
70057b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
70157b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
70257b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
70357b952d6SSatish Balay   InsertMode  addv;
70457b952d6SSatish Balay   Scalar      *rvalues,*svalues;
70557b952d6SSatish Balay 
706d64ed03dSBarry Smith   PetscFunctionBegin;
707570da906SBarry Smith   if (baij->donotstash) {
708570da906SBarry Smith     baij->svalues    = 0; baij->rvalues    = 0;
709570da906SBarry Smith     baij->nsends     = 0; baij->nrecvs     = 0;
710570da906SBarry Smith     baij->send_waits = 0; baij->recv_waits = 0;
711570da906SBarry Smith     baij->rmax       = 0;
712570da906SBarry Smith     PetscFunctionReturn(0);
713570da906SBarry Smith   }
714570da906SBarry Smith 
71557b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
716ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
71757b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
718a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
71957b952d6SSatish Balay   }
720e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
72157b952d6SSatish Balay 
72257b952d6SSatish Balay   /*  first count number of contributors to each processor */
72357b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
72457b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
72557b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
72657b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
72757b952d6SSatish Balay     idx = baij->stash.idx[i];
72857b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
72957b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
73057b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
73157b952d6SSatish Balay       }
73257b952d6SSatish Balay     }
73357b952d6SSatish Balay   }
73457b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
73557b952d6SSatish Balay 
73657b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
73757b952d6SSatish Balay   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
738ca161407SBarry Smith   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
73957b952d6SSatish Balay   nreceives = work[rank];
740ca161407SBarry Smith   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
74157b952d6SSatish Balay   nmax      = work[rank];
74257b952d6SSatish Balay   PetscFree(work);
74357b952d6SSatish Balay 
74457b952d6SSatish Balay   /* post receives:
74557b952d6SSatish Balay        1) each message will consist of ordered pairs
74657b952d6SSatish Balay      (global index,value) we store the global index as a double
74757b952d6SSatish Balay      to simplify the message passing.
74857b952d6SSatish Balay        2) since we don't know how long each individual message is we
74957b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
75057b952d6SSatish Balay      this is a lot of wasted space.
75157b952d6SSatish Balay 
75257b952d6SSatish Balay 
75357b952d6SSatish Balay        This could be done better.
75457b952d6SSatish Balay   */
755f8abc2e8SBarry Smith   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
756f8abc2e8SBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
75757b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
758ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
75957b952d6SSatish Balay   }
76057b952d6SSatish Balay 
76157b952d6SSatish Balay   /* do sends:
76257b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
76357b952d6SSatish Balay          the ith processor
76457b952d6SSatish Balay   */
76557b952d6SSatish Balay   svalues    = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
766d64ed03dSBarry Smith   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
76757b952d6SSatish Balay   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
76857b952d6SSatish Balay   starts[0] = 0;
76957b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
77057b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
77157b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
77257b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
77357b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
77457b952d6SSatish Balay   }
77557b952d6SSatish Balay   PetscFree(owner);
77657b952d6SSatish Balay   starts[0] = 0;
77757b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
77857b952d6SSatish Balay   count = 0;
77957b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
78057b952d6SSatish Balay     if (procs[i]) {
781ca161407SBarry Smith       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
78257b952d6SSatish Balay     }
78357b952d6SSatish Balay   }
78457b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
78557b952d6SSatish Balay 
78657b952d6SSatish Balay   /* Free cache space */
78710a665d1SBarry Smith   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
78857b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
78957b952d6SSatish Balay 
79057b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
79157b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
79257b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
79357b952d6SSatish Balay   baij->rmax       = nmax;
79457b952d6SSatish Balay 
7953a40ed3dSBarry Smith   PetscFunctionReturn(0);
79657b952d6SSatish Balay }
797bd7f49f5SSatish Balay 
798fef45726SSatish Balay /*
799fef45726SSatish Balay   Creates the hash table, and sets the table
800fef45726SSatish Balay   This table is created only once.
801fef45726SSatish Balay   If new entried need to be added to the matrix
802fef45726SSatish Balay   then the hash table has to be destroyed and
803fef45726SSatish Balay   recreated.
804fef45726SSatish Balay */
805fef45726SSatish Balay #undef __FUNC__
806fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
807d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
808596b8d2eSBarry Smith {
809596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
810596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
811596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
8120bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
8134a15367fSSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart;
814187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
815fef45726SSatish Balay   int         *HT,key;
8160bdbc534SSatish Balay   Scalar      **HD;
817c2760754SSatish Balay   double      tmp;
8184a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
8194a15367fSSatish Balay   int         ct=0,max=0;
8204a15367fSSatish Balay #endif
821fef45726SSatish Balay 
822d64ed03dSBarry Smith   PetscFunctionBegin;
8230bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
8240bdbc534SSatish Balay   size = baij->ht_size;
825fef45726SSatish Balay 
8260bdbc534SSatish Balay   if (baij->ht) {
8270bdbc534SSatish Balay     PetscFunctionReturn(0);
828596b8d2eSBarry Smith   }
8290bdbc534SSatish Balay 
830fef45726SSatish Balay   /* Allocate Memory for Hash Table */
831b9e4cc15SSatish Balay   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
832b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
833b9e4cc15SSatish Balay   HD = baij->hd;
834a07cd24cSSatish Balay   HT = baij->ht;
835b9e4cc15SSatish Balay 
836b9e4cc15SSatish Balay 
837c2760754SSatish Balay   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
8380bdbc534SSatish Balay 
839596b8d2eSBarry Smith 
840596b8d2eSBarry Smith   /* Loop Over A */
8410bdbc534SSatish Balay   for ( i=0; i<a->mbs; i++ ) {
842596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8430bdbc534SSatish Balay       row = i+rstart;
8440bdbc534SSatish Balay       col = aj[j]+cstart;
845596b8d2eSBarry Smith 
846187ce0cbSSatish Balay       key = row*Nbs + col + 1;
847c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8480bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
8490bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8500bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8510bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
852596b8d2eSBarry Smith           break;
853187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
854187ce0cbSSatish Balay         } else {
855187ce0cbSSatish Balay           ct++;
856187ce0cbSSatish Balay #endif
857596b8d2eSBarry Smith         }
858187ce0cbSSatish Balay       }
859187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
860187ce0cbSSatish Balay       if (k> max) max = k;
861187ce0cbSSatish Balay #endif
862596b8d2eSBarry Smith     }
863596b8d2eSBarry Smith   }
864596b8d2eSBarry Smith   /* Loop Over B */
8650bdbc534SSatish Balay   for ( i=0; i<b->mbs; i++ ) {
866596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
8670bdbc534SSatish Balay       row = i+rstart;
8680bdbc534SSatish Balay       col = garray[bj[j]];
869187ce0cbSSatish Balay       key = row*Nbs + col + 1;
870c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8710bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
8720bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8730bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8740bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
875596b8d2eSBarry Smith           break;
876187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
877187ce0cbSSatish Balay         } else {
878187ce0cbSSatish Balay           ct++;
879187ce0cbSSatish Balay #endif
880596b8d2eSBarry Smith         }
881187ce0cbSSatish Balay       }
882187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
883187ce0cbSSatish Balay       if (k> max) max = k;
884187ce0cbSSatish Balay #endif
885596b8d2eSBarry Smith     }
886596b8d2eSBarry Smith   }
887596b8d2eSBarry Smith 
888596b8d2eSBarry Smith   /* Print Summary */
889187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
890c2760754SSatish Balay   for ( i=0,j=0; i<size; i++)
891596b8d2eSBarry Smith     if (HT[i]) {j++;}
892187ce0cbSSatish Balay   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
893187ce0cbSSatish Balay            (j== 0)? 0.0:((double)(ct+j))/j,max);
894187ce0cbSSatish Balay #endif
8953a40ed3dSBarry Smith   PetscFunctionReturn(0);
896596b8d2eSBarry Smith }
89757b952d6SSatish Balay 
8985615d1e5SSatish Balay #undef __FUNC__
8995615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
900ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
90157b952d6SSatish Balay {
90257b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
90357b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
90457b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
905b7029e64SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
90657b952d6SSatish Balay   Scalar      *values,val;
907e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
90857b952d6SSatish Balay 
909d64ed03dSBarry Smith   PetscFunctionBegin;
91057b952d6SSatish Balay   /*  wait on receives */
91157b952d6SSatish Balay   while (count) {
912ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
91357b952d6SSatish Balay     /* unpack receives into our local space */
91457b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
915ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
91657b952d6SSatish Balay     n = n/3;
91757b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
91857b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
91957b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
92057b952d6SSatish Balay       val = values[3*i+2];
92157b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
92257b952d6SSatish Balay         col -= baij->cstart*bs;
9236fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
924d64ed03dSBarry Smith       } else {
92557b952d6SSatish Balay         if (mat->was_assembled) {
926905e6a2fSBarry Smith           if (!baij->colmap) {
927905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
928905e6a2fSBarry Smith           }
92948e59246SSatish Balay #if defined (USE_CTABLE)
93048e59246SSatish Balay 	  col = TableFind( baij->colmap, col/bs + 1 ) - 1 + col%bs;
93148e59246SSatish Balay #else
932a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
93348e59246SSatish Balay #endif
93457b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
93557b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
93657b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
93757b952d6SSatish Balay           }
93857b952d6SSatish Balay         }
9396fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
94057b952d6SSatish Balay       }
94157b952d6SSatish Balay     }
94257b952d6SSatish Balay     count--;
94357b952d6SSatish Balay   }
944570da906SBarry Smith   if (baij->recv_waits) PetscFree(baij->recv_waits);
945570da906SBarry Smith   if (baij->rvalues)    PetscFree(baij->rvalues);
94657b952d6SSatish Balay 
94757b952d6SSatish Balay   /* wait on sends */
94857b952d6SSatish Balay   if (baij->nsends) {
949d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
950ca161407SBarry Smith     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
95157b952d6SSatish Balay     PetscFree(send_status);
95257b952d6SSatish Balay   }
953570da906SBarry Smith   if (baij->send_waits) PetscFree(baij->send_waits);
954570da906SBarry Smith   if (baij->svalues)    PetscFree(baij->svalues);
95557b952d6SSatish Balay 
95657b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
95757b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
95857b952d6SSatish Balay 
95957b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
96057b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
9616e713f22SBarry Smith   /*
9626e713f22SBarry Smith      if nonzero structure of submatrix B cannot change then we know that
9636e713f22SBarry Smith      no processor disassembled thus we can skip this stuff
9646e713f22SBarry Smith   */
9656e713f22SBarry Smith   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
966ca161407SBarry Smith     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
96757b952d6SSatish Balay     if (mat->was_assembled && !other_disassembled) {
96857b952d6SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
96957b952d6SSatish Balay     }
9706e713f22SBarry Smith   }
97157b952d6SSatish Balay 
9726d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
97357b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
97457b952d6SSatish Balay   }
97557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
97657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
97757b952d6SSatish Balay 
978187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
979187ce0cbSSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
980187ce0cbSSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
981187ce0cbSSatish Balay              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
982187ce0cbSSatish Balay     baij->ht_total_ct  = 0;
983187ce0cbSSatish Balay     baij->ht_insert_ct = 0;
984187ce0cbSSatish Balay   }
985187ce0cbSSatish Balay #endif
986133cdb44SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
987133cdb44SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
988f830108cSBarry Smith     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
989f830108cSBarry Smith     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
990bd7f49f5SSatish Balay   }
991187ce0cbSSatish Balay 
99257b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
9933a40ed3dSBarry Smith   PetscFunctionReturn(0);
99457b952d6SSatish Balay }
99557b952d6SSatish Balay 
9965615d1e5SSatish Balay #undef __FUNC__
9975615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
99857b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
99957b952d6SSatish Balay {
100057b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
100157b952d6SSatish Balay   int          ierr;
100257b952d6SSatish Balay 
1003d64ed03dSBarry Smith   PetscFunctionBegin;
100457b952d6SSatish Balay   if (baij->size == 1) {
100557b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1006a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
10073a40ed3dSBarry Smith   PetscFunctionReturn(0);
100857b952d6SSatish Balay }
100957b952d6SSatish Balay 
10105615d1e5SSatish Balay #undef __FUNC__
10117b2a1423SBarry Smith #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
10127b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
101357b952d6SSatish Balay {
101457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
101577ed5343SBarry Smith   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
101657b952d6SSatish Balay   FILE         *fd;
101757b952d6SSatish Balay   ViewerType   vtype;
101857b952d6SSatish Balay 
1019d64ed03dSBarry Smith   PetscFunctionBegin;
102057b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
10213f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
102257b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
1023639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
10244e220ebcSLois Curfman McInnes       MatInfo info;
102557b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
102657b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
10274e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
102857b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
102957b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
10304e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
10314e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
10324e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
10334e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
10344e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
10354e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
103657b952d6SSatish Balay       fflush(fd);
103757b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
103857b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
10393a40ed3dSBarry Smith       PetscFunctionReturn(0);
1040d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1041bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
10423a40ed3dSBarry Smith       PetscFunctionReturn(0);
104357b952d6SSatish Balay     }
104457b952d6SSatish Balay   }
104557b952d6SSatish Balay 
10463f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
104757b952d6SSatish Balay     Draw       draw;
104857b952d6SSatish Balay     PetscTruth isnull;
104977ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
10503a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
105157b952d6SSatish Balay   }
105257b952d6SSatish Balay 
105357b952d6SSatish Balay   if (size == 1) {
105457b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1055d64ed03dSBarry Smith   } else {
105657b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
105757b952d6SSatish Balay     Mat         A;
105857b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
105940011551SBarry Smith     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
106057b952d6SSatish Balay     int         mbs = baij->mbs;
106157b952d6SSatish Balay     Scalar      *a;
106257b952d6SSatish Balay 
106357b952d6SSatish Balay     if (!rank) {
106455843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1065d64ed03dSBarry Smith     } else {
106655843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
106757b952d6SSatish Balay     }
106857b952d6SSatish Balay     PLogObjectParent(mat,A);
106957b952d6SSatish Balay 
107057b952d6SSatish Balay     /* copy over the A part */
107157b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->A->data;
107257b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
107357b952d6SSatish Balay     rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
107457b952d6SSatish Balay 
107557b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
107657b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
107757b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
107857b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
107957b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
108057b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1081cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1082cee3aa6bSSatish Balay           col++; a += bs;
108357b952d6SSatish Balay         }
108457b952d6SSatish Balay       }
108557b952d6SSatish Balay     }
108657b952d6SSatish Balay     /* copy over the B part */
108757b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->B->data;
108857b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
108957b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
109057b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
109157b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
109257b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
109357b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
109457b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1095cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1096cee3aa6bSSatish Balay           col++; a += bs;
109757b952d6SSatish Balay         }
109857b952d6SSatish Balay       }
109957b952d6SSatish Balay     }
110057b952d6SSatish Balay     PetscFree(rvals);
11016d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
11026d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
110355843e3eSBarry Smith     /*
110455843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
110555843e3eSBarry Smith        synchronized across all processors that share the Draw object
110655843e3eSBarry Smith     */
11073f1db9ecSBarry Smith     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
110857b952d6SSatish Balay       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
110957b952d6SSatish Balay     }
111057b952d6SSatish Balay     ierr = MatDestroy(A); CHKERRQ(ierr);
111157b952d6SSatish Balay   }
11123a40ed3dSBarry Smith   PetscFunctionReturn(0);
111357b952d6SSatish Balay }
111457b952d6SSatish Balay 
111557b952d6SSatish Balay 
111657b952d6SSatish Balay 
11175615d1e5SSatish Balay #undef __FUNC__
11185615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
1119e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer)
112057b952d6SSatish Balay {
112157b952d6SSatish Balay   int         ierr;
112257b952d6SSatish Balay   ViewerType  vtype;
112357b952d6SSatish Balay 
1124d64ed03dSBarry Smith   PetscFunctionBegin;
112557b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
11263f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
11277b2a1423SBarry Smith       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
11287b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
11293f1db9ecSBarry Smith   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
11303a40ed3dSBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
11315cd90555SBarry Smith   } else {
11325cd90555SBarry Smith     SETERRQ(1,1,"Viewer type not supported by PETSc object");
113357b952d6SSatish Balay   }
11343a40ed3dSBarry Smith   PetscFunctionReturn(0);
113557b952d6SSatish Balay }
113657b952d6SSatish Balay 
11375615d1e5SSatish Balay #undef __FUNC__
11385615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
1139e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
114079bdfe76SSatish Balay {
114179bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
114279bdfe76SSatish Balay   int         ierr;
114379bdfe76SSatish Balay 
1144d64ed03dSBarry Smith   PetscFunctionBegin;
114598dd23e9SBarry Smith   if (--mat->refct > 0) PetscFunctionReturn(0);
114698dd23e9SBarry Smith 
114798dd23e9SBarry Smith   if (mat->mapping) {
114898dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
114998dd23e9SBarry Smith   }
115098dd23e9SBarry Smith   if (mat->bmapping) {
115198dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
115298dd23e9SBarry Smith   }
115361b13de0SBarry Smith   if (mat->rmap) {
115461b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
115561b13de0SBarry Smith   }
115661b13de0SBarry Smith   if (mat->cmap) {
115761b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
115861b13de0SBarry Smith   }
11593a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
1160e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
116179bdfe76SSatish Balay #endif
116279bdfe76SSatish Balay 
116383e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
116479bdfe76SSatish Balay   PetscFree(baij->rowners);
116579bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
116679bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
116748e59246SSatish Balay #if defined (USE_CTABLE)
116848e59246SSatish Balay   if (baij->colmap) TableDelete(baij->colmap);
116948e59246SSatish Balay #else
117079bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
117148e59246SSatish Balay #endif
117279bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
117379bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
117479bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
117579bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
117630793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
1177b9e4cc15SSatish Balay   if (baij->hd) PetscFree(baij->hd);
117879bdfe76SSatish Balay   PetscFree(baij);
117979bdfe76SSatish Balay   PLogObjectDestroy(mat);
118079bdfe76SSatish Balay   PetscHeaderDestroy(mat);
11813a40ed3dSBarry Smith   PetscFunctionReturn(0);
118279bdfe76SSatish Balay }
118379bdfe76SSatish Balay 
11845615d1e5SSatish Balay #undef __FUNC__
11855615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
1186ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1187cee3aa6bSSatish Balay {
1188cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
118947b4a8eaSLois Curfman McInnes   int         ierr, nt;
1190cee3aa6bSSatish Balay 
1191d64ed03dSBarry Smith   PetscFunctionBegin;
1192e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
119347b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1194a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
119547b4a8eaSLois Curfman McInnes   }
1196e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
119747b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1198a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
119947b4a8eaSLois Curfman McInnes   }
120043a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1201f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
120243a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1203f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
120443a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
12053a40ed3dSBarry Smith   PetscFunctionReturn(0);
1206cee3aa6bSSatish Balay }
1207cee3aa6bSSatish Balay 
12085615d1e5SSatish Balay #undef __FUNC__
12095615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
1210ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1211cee3aa6bSSatish Balay {
1212cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1213cee3aa6bSSatish Balay   int        ierr;
1214d64ed03dSBarry Smith 
1215d64ed03dSBarry Smith   PetscFunctionBegin;
121643a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1217f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
121843a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1219f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
12203a40ed3dSBarry Smith   PetscFunctionReturn(0);
1221cee3aa6bSSatish Balay }
1222cee3aa6bSSatish Balay 
12235615d1e5SSatish Balay #undef __FUNC__
12245615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
1225ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1226cee3aa6bSSatish Balay {
1227cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1228cee3aa6bSSatish Balay   int         ierr;
1229cee3aa6bSSatish Balay 
1230d64ed03dSBarry Smith   PetscFunctionBegin;
1231cee3aa6bSSatish Balay   /* do nondiagonal part */
1232f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1233cee3aa6bSSatish Balay   /* send it on its way */
1234537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1235cee3aa6bSSatish Balay   /* do local part */
1236f830108cSBarry Smith   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1237cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1238cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1239cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1240639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12413a40ed3dSBarry Smith   PetscFunctionReturn(0);
1242cee3aa6bSSatish Balay }
1243cee3aa6bSSatish Balay 
12445615d1e5SSatish Balay #undef __FUNC__
12455615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1246ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1247cee3aa6bSSatish Balay {
1248cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1249cee3aa6bSSatish Balay   int         ierr;
1250cee3aa6bSSatish Balay 
1251d64ed03dSBarry Smith   PetscFunctionBegin;
1252cee3aa6bSSatish Balay   /* do nondiagonal part */
1253f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1254cee3aa6bSSatish Balay   /* send it on its way */
1255537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1256cee3aa6bSSatish Balay   /* do local part */
1257f830108cSBarry Smith   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1258cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1259cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1260cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1261537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
12623a40ed3dSBarry Smith   PetscFunctionReturn(0);
1263cee3aa6bSSatish Balay }
1264cee3aa6bSSatish Balay 
1265cee3aa6bSSatish Balay /*
1266cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1267cee3aa6bSSatish Balay    diagonal block
1268cee3aa6bSSatish Balay */
12695615d1e5SSatish Balay #undef __FUNC__
12705615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1271ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1272cee3aa6bSSatish Balay {
1273cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
12743a40ed3dSBarry Smith   int         ierr;
1275d64ed03dSBarry Smith 
1276d64ed03dSBarry Smith   PetscFunctionBegin;
1277a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
12783a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
12793a40ed3dSBarry Smith   PetscFunctionReturn(0);
1280cee3aa6bSSatish Balay }
1281cee3aa6bSSatish Balay 
12825615d1e5SSatish Balay #undef __FUNC__
12835615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
1284ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1285cee3aa6bSSatish Balay {
1286cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1287cee3aa6bSSatish Balay   int         ierr;
1288d64ed03dSBarry Smith 
1289d64ed03dSBarry Smith   PetscFunctionBegin;
1290cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1291cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
12923a40ed3dSBarry Smith   PetscFunctionReturn(0);
1293cee3aa6bSSatish Balay }
1294026e39d0SSatish Balay 
12955615d1e5SSatish Balay #undef __FUNC__
12965615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
1297ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
129857b952d6SSatish Balay {
129957b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1300d64ed03dSBarry Smith 
1301d64ed03dSBarry Smith   PetscFunctionBegin;
1302bd7f49f5SSatish Balay   if (m) *m = mat->M;
1303bd7f49f5SSatish Balay   if (n) *n = mat->N;
13043a40ed3dSBarry Smith   PetscFunctionReturn(0);
130557b952d6SSatish Balay }
130657b952d6SSatish Balay 
13075615d1e5SSatish Balay #undef __FUNC__
13085615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1309ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
131057b952d6SSatish Balay {
131157b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1312d64ed03dSBarry Smith 
1313d64ed03dSBarry Smith   PetscFunctionBegin;
1314f830108cSBarry Smith   *m = mat->m; *n = mat->n;
13153a40ed3dSBarry Smith   PetscFunctionReturn(0);
131657b952d6SSatish Balay }
131757b952d6SSatish Balay 
13185615d1e5SSatish Balay #undef __FUNC__
13195615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1320ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
132157b952d6SSatish Balay {
132257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1323d64ed03dSBarry Smith 
1324d64ed03dSBarry Smith   PetscFunctionBegin;
132557b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
13263a40ed3dSBarry Smith   PetscFunctionReturn(0);
132757b952d6SSatish Balay }
132857b952d6SSatish Balay 
1329acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1330acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1331acdf5bf4SSatish Balay 
13325615d1e5SSatish Balay #undef __FUNC__
13335615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1334acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1335acdf5bf4SSatish Balay {
1336acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1337acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1338acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1339d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1340d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1341acdf5bf4SSatish Balay 
1342d64ed03dSBarry Smith   PetscFunctionBegin;
1343a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1344acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1345acdf5bf4SSatish Balay 
1346acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1347acdf5bf4SSatish Balay     /*
1348acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1349acdf5bf4SSatish Balay     */
1350acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1351bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1352bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1353acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1354acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1355acdf5bf4SSatish Balay     }
1356acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1357acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1358acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1359acdf5bf4SSatish Balay   }
1360acdf5bf4SSatish Balay 
1361a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1362d9d09a02SSatish Balay   lrow = row - brstart;
1363acdf5bf4SSatish Balay 
1364acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1365acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1366acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1367f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1368f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1369acdf5bf4SSatish Balay   nztot = nzA + nzB;
1370acdf5bf4SSatish Balay 
1371acdf5bf4SSatish Balay   cmap  = mat->garray;
1372acdf5bf4SSatish Balay   if (v  || idx) {
1373acdf5bf4SSatish Balay     if (nztot) {
1374acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1375acdf5bf4SSatish Balay       int imark = -1;
1376acdf5bf4SSatish Balay       if (v) {
1377acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1378acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1379d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1380acdf5bf4SSatish Balay           else break;
1381acdf5bf4SSatish Balay         }
1382acdf5bf4SSatish Balay         imark = i;
1383acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1384acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1385acdf5bf4SSatish Balay       }
1386acdf5bf4SSatish Balay       if (idx) {
1387acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1388acdf5bf4SSatish Balay         if (imark > -1) {
1389acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1390bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1391acdf5bf4SSatish Balay           }
1392acdf5bf4SSatish Balay         } else {
1393acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1394d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1395d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1396acdf5bf4SSatish Balay             else break;
1397acdf5bf4SSatish Balay           }
1398acdf5bf4SSatish Balay           imark = i;
1399acdf5bf4SSatish Balay         }
1400d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1401d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1402acdf5bf4SSatish Balay       }
1403d64ed03dSBarry Smith     } else {
1404d212a18eSSatish Balay       if (idx) *idx = 0;
1405d212a18eSSatish Balay       if (v)   *v   = 0;
1406d212a18eSSatish Balay     }
1407acdf5bf4SSatish Balay   }
1408acdf5bf4SSatish Balay   *nz = nztot;
1409f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1410f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
14113a40ed3dSBarry Smith   PetscFunctionReturn(0);
1412acdf5bf4SSatish Balay }
1413acdf5bf4SSatish Balay 
14145615d1e5SSatish Balay #undef __FUNC__
14155615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1416acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1417acdf5bf4SSatish Balay {
1418acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1419d64ed03dSBarry Smith 
1420d64ed03dSBarry Smith   PetscFunctionBegin;
1421acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1422a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1423acdf5bf4SSatish Balay   }
1424acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
14253a40ed3dSBarry Smith   PetscFunctionReturn(0);
1426acdf5bf4SSatish Balay }
1427acdf5bf4SSatish Balay 
14285615d1e5SSatish Balay #undef __FUNC__
14295615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1430ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
14315a838052SSatish Balay {
14325a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1433d64ed03dSBarry Smith 
1434d64ed03dSBarry Smith   PetscFunctionBegin;
14355a838052SSatish Balay   *bs = baij->bs;
14363a40ed3dSBarry Smith   PetscFunctionReturn(0);
14375a838052SSatish Balay }
14385a838052SSatish Balay 
14395615d1e5SSatish Balay #undef __FUNC__
14405615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1441ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
144258667388SSatish Balay {
144358667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
144458667388SSatish Balay   int         ierr;
1445d64ed03dSBarry Smith 
1446d64ed03dSBarry Smith   PetscFunctionBegin;
144758667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
144858667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
14493a40ed3dSBarry Smith   PetscFunctionReturn(0);
145058667388SSatish Balay }
14510ac07820SSatish Balay 
14525615d1e5SSatish Balay #undef __FUNC__
14535615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1454ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14550ac07820SSatish Balay {
14564e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
14574e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
14587d57db60SLois Curfman McInnes   int         ierr;
14597d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
14600ac07820SSatish Balay 
1461d64ed03dSBarry Smith   PetscFunctionBegin;
14624e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
14634e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
14640e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1465de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14664e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
14670e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1468de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
14690ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14704e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14714e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14724e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14734e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14744e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14750ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1476f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
14774e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14784e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14794e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14804e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14814e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14820ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1483f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
14844e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14854e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14864e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14874e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14884e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14890ac07820SSatish Balay   }
14905a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
14915a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
14925a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
14935a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
14944e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14954e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14964e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14973a40ed3dSBarry Smith   PetscFunctionReturn(0);
14980ac07820SSatish Balay }
14990ac07820SSatish Balay 
15005615d1e5SSatish Balay #undef __FUNC__
15015615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1502ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
150358667388SSatish Balay {
150458667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
150558667388SSatish Balay 
1506d64ed03dSBarry Smith   PetscFunctionBegin;
150758667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
150858667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
15096da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1510c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
1511*4787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1512*4787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1513b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1514b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1515b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1516aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
151758667388SSatish Balay         MatSetOption(a->A,op);
151858667388SSatish Balay         MatSetOption(a->B,op);
1519b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
15206da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
152158667388SSatish Balay              op == MAT_SYMMETRIC ||
152258667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1523b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
1524b51ba29fSSatish Balay              op == MAT_USE_HASH_TABLE)
152558667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
152658667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
152758667388SSatish Balay     a->roworiented = 0;
152858667388SSatish Balay     MatSetOption(a->A,op);
152958667388SSatish Balay     MatSetOption(a->B,op);
15302b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
153190f02eecSBarry Smith     a->donotstash = 1;
1532d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1533d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1534133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1535133cdb44SSatish Balay     a->ht_flag = 1;
1536d64ed03dSBarry Smith   } else {
1537d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1538d64ed03dSBarry Smith   }
15393a40ed3dSBarry Smith   PetscFunctionReturn(0);
154058667388SSatish Balay }
154158667388SSatish Balay 
15425615d1e5SSatish Balay #undef __FUNC__
15435615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1544ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15450ac07820SSatish Balay {
15460ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
15470ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15480ac07820SSatish Balay   Mat        B;
154940011551SBarry Smith   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
15500ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
15510ac07820SSatish Balay   Scalar     *a;
15520ac07820SSatish Balay 
1553d64ed03dSBarry Smith   PetscFunctionBegin;
1554a8c6a408SBarry Smith   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
15550ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
15560ac07820SSatish Balay   CHKERRQ(ierr);
15570ac07820SSatish Balay 
15580ac07820SSatish Balay   /* copy over the A part */
15590ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
15600ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15610ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
15620ac07820SSatish Balay 
15630ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
15640ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15650ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
15660ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
15670ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
15680ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
15690ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15700ac07820SSatish Balay         col++; a += bs;
15710ac07820SSatish Balay       }
15720ac07820SSatish Balay     }
15730ac07820SSatish Balay   }
15740ac07820SSatish Balay   /* copy over the B part */
15750ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
15760ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15770ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
15780ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15790ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
15800ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
15810ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
15820ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
15830ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15840ac07820SSatish Balay         col++; a += bs;
15850ac07820SSatish Balay       }
15860ac07820SSatish Balay     }
15870ac07820SSatish Balay   }
15880ac07820SSatish Balay   PetscFree(rvals);
15890ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15900ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15910ac07820SSatish Balay 
15920ac07820SSatish Balay   if (matout != PETSC_NULL) {
15930ac07820SSatish Balay     *matout = B;
15940ac07820SSatish Balay   } else {
1595f830108cSBarry Smith     PetscOps *Abops;
1596cc2dc46cSBarry Smith     MatOps   Aops;
1597f830108cSBarry Smith 
15980ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
15990ac07820SSatish Balay     PetscFree(baij->rowners);
16000ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
16010ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1602b1fc9764SSatish Balay #if defined (USE_CTABLE)
1603b1fc9764SSatish Balay     if (baij->colmap) TableDelete(baij->colmap);
1604b1fc9764SSatish Balay #else
16050ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
1606b1fc9764SSatish Balay #endif
16070ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
16080ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
16090ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
16100ac07820SSatish Balay     PetscFree(baij);
1611f830108cSBarry Smith 
1612f830108cSBarry Smith     /*
1613f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1614f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1615f830108cSBarry Smith       else from C.
1616f830108cSBarry Smith     */
1617f830108cSBarry Smith     Abops = A->bops;
1618f830108cSBarry Smith     Aops  = A->ops;
1619f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1620f830108cSBarry Smith     A->bops = Abops;
1621f830108cSBarry Smith     A->ops  = Aops;
1622f830108cSBarry Smith 
16230ac07820SSatish Balay     PetscHeaderDestroy(B);
16240ac07820SSatish Balay   }
16253a40ed3dSBarry Smith   PetscFunctionReturn(0);
16260ac07820SSatish Balay }
16270e95ebc0SSatish Balay 
16285615d1e5SSatish Balay #undef __FUNC__
16295615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
16300e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
16310e95ebc0SSatish Balay {
16320e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
16330e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
16340e95ebc0SSatish Balay   int ierr,s1,s2,s3;
16350e95ebc0SSatish Balay 
1636d64ed03dSBarry Smith   PetscFunctionBegin;
16370e95ebc0SSatish Balay   if (ll)  {
16380e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
16390e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1640a8c6a408SBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
16410e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
16420e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
16430e95ebc0SSatish Balay   }
1644a8c6a408SBarry Smith   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
16453a40ed3dSBarry Smith   PetscFunctionReturn(0);
16460e95ebc0SSatish Balay }
16470e95ebc0SSatish Balay 
16485615d1e5SSatish Balay #undef __FUNC__
16495615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1650ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
16510ac07820SSatish Balay {
16520ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
16530ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1654a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
16550ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
16560ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1657a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16580ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16590ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16600ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16610ac07820SSatish Balay   IS             istmp;
16620ac07820SSatish Balay 
1663d64ed03dSBarry Smith   PetscFunctionBegin;
16640ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
16650ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
16660ac07820SSatish Balay 
16670ac07820SSatish Balay   /*  first count number of contributors to each processor */
16680ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
16690ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
16700ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
16710ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
16720ac07820SSatish Balay     idx = rows[i];
16730ac07820SSatish Balay     found = 0;
16740ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
16750ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
16760ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
16770ac07820SSatish Balay       }
16780ac07820SSatish Balay     }
1679a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
16800ac07820SSatish Balay   }
16810ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
16820ac07820SSatish Balay 
16830ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
16840ac07820SSatish Balay   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1685ca161407SBarry Smith   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
16860ac07820SSatish Balay   nrecvs = work[rank];
1687ca161407SBarry Smith   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
16880ac07820SSatish Balay   nmax   = work[rank];
16890ac07820SSatish Balay   PetscFree(work);
16900ac07820SSatish Balay 
16910ac07820SSatish Balay   /* post receives:   */
1692d64ed03dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1693d64ed03dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
16940ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
1695ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
16960ac07820SSatish Balay   }
16970ac07820SSatish Balay 
16980ac07820SSatish Balay   /* do sends:
16990ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
17000ac07820SSatish Balay      the ith processor
17010ac07820SSatish Balay   */
17020ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1703ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
17040ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
17050ac07820SSatish Balay   starts[0] = 0;
17060ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
17070ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
17080ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
17090ac07820SSatish Balay   }
17100ac07820SSatish Balay   ISRestoreIndices(is,&rows);
17110ac07820SSatish Balay 
17120ac07820SSatish Balay   starts[0] = 0;
17130ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
17140ac07820SSatish Balay   count = 0;
17150ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
17160ac07820SSatish Balay     if (procs[i]) {
1717ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
17180ac07820SSatish Balay     }
17190ac07820SSatish Balay   }
17200ac07820SSatish Balay   PetscFree(starts);
17210ac07820SSatish Balay 
17220ac07820SSatish Balay   base = owners[rank]*bs;
17230ac07820SSatish Balay 
17240ac07820SSatish Balay   /*  wait on receives */
17250ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
17260ac07820SSatish Balay   source = lens + nrecvs;
17270ac07820SSatish Balay   count  = nrecvs; slen = 0;
17280ac07820SSatish Balay   while (count) {
1729ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
17300ac07820SSatish Balay     /* unpack receives into our local space */
1731ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
17320ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
17330ac07820SSatish Balay     lens[imdex]  = n;
17340ac07820SSatish Balay     slen += n;
17350ac07820SSatish Balay     count--;
17360ac07820SSatish Balay   }
17370ac07820SSatish Balay   PetscFree(recv_waits);
17380ac07820SSatish Balay 
17390ac07820SSatish Balay   /* move the data into the send scatter */
17400ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
17410ac07820SSatish Balay   count = 0;
17420ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
17430ac07820SSatish Balay     values = rvalues + i*nmax;
17440ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
17450ac07820SSatish Balay       lrows[count++] = values[j] - base;
17460ac07820SSatish Balay     }
17470ac07820SSatish Balay   }
17480ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
17490ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
17500ac07820SSatish Balay 
17510ac07820SSatish Balay   /* actually zap the local rows */
1752029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
17530ac07820SSatish Balay   PLogObjectParent(A,istmp);
1754a07cd24cSSatish Balay 
175572dacd9aSBarry Smith   /*
175672dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
175772dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
175872dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
175972dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
176072dacd9aSBarry Smith 
176172dacd9aSBarry Smith        Contributed by: Mathew Knepley
176272dacd9aSBarry Smith   */
17639c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
17640ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
17659c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
17669c957beeSSatish Balay     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
17679c957beeSSatish Balay   } else if (diag) {
17689c957beeSSatish Balay     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1769a07cd24cSSatish Balay     for ( i = 0; i < slen; i++ ) {
1770a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
1771a07cd24cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1772a07cd24cSSatish Balay     }
1773a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1774a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
17759c957beeSSatish Balay   } else {
17769c957beeSSatish Balay     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1777a07cd24cSSatish Balay   }
17789c957beeSSatish Balay 
17799c957beeSSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1780a07cd24cSSatish Balay   PetscFree(lrows);
1781a07cd24cSSatish Balay 
17820ac07820SSatish Balay   /* wait on sends */
17830ac07820SSatish Balay   if (nsends) {
1784d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1785ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
17860ac07820SSatish Balay     PetscFree(send_status);
17870ac07820SSatish Balay   }
17880ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
17890ac07820SSatish Balay 
17903a40ed3dSBarry Smith   PetscFunctionReturn(0);
17910ac07820SSatish Balay }
179272dacd9aSBarry Smith 
1793ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
17945615d1e5SSatish Balay #undef __FUNC__
17955615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1796ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1797ba4ca20aSSatish Balay {
1798ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
179925fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1800133cdb44SSatish Balay   static int  called = 0;
18013a40ed3dSBarry Smith   int         ierr;
1802ba4ca20aSSatish Balay 
1803d64ed03dSBarry Smith   PetscFunctionBegin;
18043a40ed3dSBarry Smith   if (!a->rank) {
18053a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
180625fdafccSSatish Balay   }
180725fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1808133cdb44SSatish Balay   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1809133cdb44SSatish Balay   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
18103a40ed3dSBarry Smith   PetscFunctionReturn(0);
1811ba4ca20aSSatish Balay }
18120ac07820SSatish Balay 
18135615d1e5SSatish Balay #undef __FUNC__
18145615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1815ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1816bb5a7306SBarry Smith {
1817bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1818bb5a7306SBarry Smith   int         ierr;
1819d64ed03dSBarry Smith 
1820d64ed03dSBarry Smith   PetscFunctionBegin;
1821bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
18223a40ed3dSBarry Smith   PetscFunctionReturn(0);
1823bb5a7306SBarry Smith }
1824bb5a7306SBarry Smith 
18252e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
18260ac07820SSatish Balay 
182779bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1828cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1829cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1830cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1831cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1832cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1833cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
1834cc2dc46cSBarry Smith   MatMultTrans_MPIBAIJ,
1835cc2dc46cSBarry Smith   MatMultTransAdd_MPIBAIJ,
1836cc2dc46cSBarry Smith   0,
1837cc2dc46cSBarry Smith   0,
1838cc2dc46cSBarry Smith   0,
1839cc2dc46cSBarry Smith   0,
1840cc2dc46cSBarry Smith   0,
1841cc2dc46cSBarry Smith   0,
1842cc2dc46cSBarry Smith   0,
1843cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1844cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
1845cc2dc46cSBarry Smith   0,
1846cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1847cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1848cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1849cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1850cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1851cc2dc46cSBarry Smith   0,
1852cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1853cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1854cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1855cc2dc46cSBarry Smith   0,
1856cc2dc46cSBarry Smith   0,
1857cc2dc46cSBarry Smith   0,
1858cc2dc46cSBarry Smith   0,
1859cc2dc46cSBarry Smith   MatGetSize_MPIBAIJ,
1860cc2dc46cSBarry Smith   MatGetLocalSize_MPIBAIJ,
1861cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1862cc2dc46cSBarry Smith   0,
1863cc2dc46cSBarry Smith   0,
1864cc2dc46cSBarry Smith   0,
1865cc2dc46cSBarry Smith   0,
18662e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1867cc2dc46cSBarry Smith   0,
1868cc2dc46cSBarry Smith   0,
1869cc2dc46cSBarry Smith   0,
1870cc2dc46cSBarry Smith   0,
1871cc2dc46cSBarry Smith   0,
1872cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1873cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1874cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1875cc2dc46cSBarry Smith   0,
1876cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1877cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1878cc2dc46cSBarry Smith   0,
1879cc2dc46cSBarry Smith   0,
1880cc2dc46cSBarry Smith   0,
1881cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1882cc2dc46cSBarry Smith   0,
1883cc2dc46cSBarry Smith   0,
1884cc2dc46cSBarry Smith   0,
1885cc2dc46cSBarry Smith   0,
1886cc2dc46cSBarry Smith   0,
1887cc2dc46cSBarry Smith   0,
1888cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
1889cc2dc46cSBarry Smith   0,
1890cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
1891cc2dc46cSBarry Smith   0,
1892cc2dc46cSBarry Smith   0,
1893cc2dc46cSBarry Smith   0,
1894cc2dc46cSBarry Smith   MatGetMaps_Petsc};
189579bdfe76SSatish Balay 
18965ef9f2a5SBarry Smith 
1897e18c124aSSatish Balay EXTERN_C_BEGIN
18985ef9f2a5SBarry Smith #undef __FUNC__
18995ef9f2a5SBarry Smith #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
19005ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
19015ef9f2a5SBarry Smith {
19025ef9f2a5SBarry Smith   PetscFunctionBegin;
19035ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
19045ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
19055ef9f2a5SBarry Smith   PetscFunctionReturn(0);
19065ef9f2a5SBarry Smith }
1907e18c124aSSatish Balay EXTERN_C_END
190879bdfe76SSatish Balay 
19095615d1e5SSatish Balay #undef __FUNC__
19105615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
191179bdfe76SSatish Balay /*@C
191279bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
191379bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
191479bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
191579bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
191679bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
191779bdfe76SSatish Balay 
1918db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1919db81eaa0SLois Curfman McInnes 
192079bdfe76SSatish Balay    Input Parameters:
1921db81eaa0SLois Curfman McInnes +  comm - MPI communicator
192279bdfe76SSatish Balay .  bs   - size of blockk
192379bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
192492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
192592e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
192692e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
192792e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
192892e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
1929be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1930be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
193179bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
193279bdfe76SSatish Balay            submatrix  (same for all local rows)
193392e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
193492e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
1935db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
193692e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
193779bdfe76SSatish Balay            submatrix (same for all local rows).
1938db81eaa0SLois Curfman McInnes -  o_nzz - array containing the number of nonzeros in the various block rows of the
193992e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
194092e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
194179bdfe76SSatish Balay 
194279bdfe76SSatish Balay    Output Parameter:
194379bdfe76SSatish Balay .  A - the matrix
194479bdfe76SSatish Balay 
1945db81eaa0SLois Curfman McInnes    Options Database Keys:
1946db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
1947db81eaa0SLois Curfman McInnes                      block calculations (much slower)
1948db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
1949494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
1950494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
19513ffaccefSLois Curfman McInnes 
1952b259b22eSLois Curfman McInnes    Notes:
195379bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
195479bdfe76SSatish Balay    (possibly both).
195579bdfe76SSatish Balay 
1956be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1957be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
1958be79a94dSBarry Smith 
195979bdfe76SSatish Balay    Storage Information:
196079bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
196179bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
196279bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
196379bdfe76SSatish Balay    local matrix (a rectangular submatrix).
196479bdfe76SSatish Balay 
196579bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
196679bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
196779bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
196879bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
196979bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
197079bdfe76SSatish Balay 
197179bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
197279bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
197379bdfe76SSatish Balay 
1974db81eaa0SLois Curfman McInnes .vb
1975db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
1976db81eaa0SLois Curfman McInnes           -------------------
1977db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
1978db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
1979db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
1980db81eaa0SLois Curfman McInnes           -------------------
1981db81eaa0SLois Curfman McInnes .ve
198279bdfe76SSatish Balay 
198379bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
198479bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
198579bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
198657b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
198779bdfe76SSatish Balay 
1988d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1989d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
199079bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
199192e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
199292e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
19936da5968aSLois Curfman McInnes    matrices.
199479bdfe76SSatish Balay 
1995027ccd11SLois Curfman McInnes    Level: intermediate
1996027ccd11SLois Curfman McInnes 
199792e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
199879bdfe76SSatish Balay 
1999db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
200079bdfe76SSatish Balay @*/
200179bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
200279bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
200379bdfe76SSatish Balay {
200479bdfe76SSatish Balay   Mat          B;
200579bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
2006133cdb44SSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
2007a2ab621fSBarry Smith   int          flag1 = 0,flag2 = 0;
200879bdfe76SSatish Balay 
2009d64ed03dSBarry Smith   PetscFunctionBegin;
2010a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
20113914022bSBarry Smith 
20123914022bSBarry Smith   MPI_Comm_size(comm,&size);
2013494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
2014494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
2015494eafd4SBarry Smith   if (!flag1 && !flag2 && size == 1) {
20163914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
20173914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
20183914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
20193a40ed3dSBarry Smith     PetscFunctionReturn(0);
20203914022bSBarry Smith   }
20213914022bSBarry Smith 
202279bdfe76SSatish Balay   *A = 0;
20233f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
202479bdfe76SSatish Balay   PLogObjectCreate(B);
202579bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
202679bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
2027cc2dc46cSBarry Smith   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
20284c50302cSBarry Smith 
2029e1311b90SBarry Smith   B->ops->destroy    = MatDestroy_MPIBAIJ;
2030e1311b90SBarry Smith   B->ops->view       = MatView_MPIBAIJ;
203190f02eecSBarry Smith   B->mapping    = 0;
203279bdfe76SSatish Balay   B->factor     = 0;
203379bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
203479bdfe76SSatish Balay 
2035e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
203679bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
203779bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
203879bdfe76SSatish Balay 
2039d64ed03dSBarry Smith   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2040a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2041d64ed03dSBarry Smith   }
2042a8c6a408SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2043a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2044a8c6a408SBarry Smith   }
2045a8c6a408SBarry Smith   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2046a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2047a8c6a408SBarry Smith   }
2048cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2049cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
205079bdfe76SSatish Balay 
205179bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
205279bdfe76SSatish Balay     work[0] = m; work[1] = n;
205379bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
2054ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
205579bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
205679bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
205779bdfe76SSatish Balay   }
205879bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
205979bdfe76SSatish Balay     Mbs = M/bs;
2060a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
206179bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
206279bdfe76SSatish Balay     m   = mbs*bs;
206379bdfe76SSatish Balay   }
206479bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
206579bdfe76SSatish Balay     Nbs = N/bs;
2066a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
206779bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
206879bdfe76SSatish Balay     n   = nbs*bs;
206979bdfe76SSatish Balay   }
2070a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
2071a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2072a8c6a408SBarry Smith   }
207379bdfe76SSatish Balay 
207479bdfe76SSatish Balay   b->m = m; B->m = m;
207579bdfe76SSatish Balay   b->n = n; B->n = n;
207679bdfe76SSatish Balay   b->N = N; B->N = N;
207779bdfe76SSatish Balay   b->M = M; B->M = M;
207879bdfe76SSatish Balay   b->bs  = bs;
207979bdfe76SSatish Balay   b->bs2 = bs*bs;
208079bdfe76SSatish Balay   b->mbs = mbs;
208179bdfe76SSatish Balay   b->nbs = nbs;
208279bdfe76SSatish Balay   b->Mbs = Mbs;
208379bdfe76SSatish Balay   b->Nbs = Nbs;
208479bdfe76SSatish Balay 
2085c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
2086c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
2087488ecbafSBarry Smith   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2088488ecbafSBarry Smith   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2089c7fcc2eaSBarry Smith 
209079bdfe76SSatish Balay   /* build local table of row and column ownerships */
209179bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
2092f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
20930ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
2094ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
209579bdfe76SSatish Balay   b->rowners[0] = 0;
209679bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
209779bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
209879bdfe76SSatish Balay   }
209979bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
210079bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
21014fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
21024fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
21034fa0d573SSatish Balay 
2104ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
210579bdfe76SSatish Balay   b->cowners[0] = 0;
210679bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
210779bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
210879bdfe76SSatish Balay   }
210979bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
211079bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
21114fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
21124fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
211379bdfe76SSatish Balay 
2114a07cd24cSSatish Balay 
211579bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2116029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
211779bdfe76SSatish Balay   PLogObjectParent(B,b->A);
211879bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2119029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
212079bdfe76SSatish Balay   PLogObjectParent(B,b->B);
212179bdfe76SSatish Balay 
212279bdfe76SSatish Balay   /* build cache for off array entries formed */
212379bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
212490f02eecSBarry Smith   b->donotstash  = 0;
212579bdfe76SSatish Balay   b->colmap      = 0;
212679bdfe76SSatish Balay   b->garray      = 0;
212779bdfe76SSatish Balay   b->roworiented = 1;
212879bdfe76SSatish Balay 
212930793edcSSatish Balay   /* stuff used in block assembly */
213030793edcSSatish Balay   b->barray       = 0;
213130793edcSSatish Balay 
213279bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
213379bdfe76SSatish Balay   b->lvec         = 0;
213479bdfe76SSatish Balay   b->Mvctx        = 0;
213579bdfe76SSatish Balay 
213679bdfe76SSatish Balay   /* stuff for MatGetRow() */
213779bdfe76SSatish Balay   b->rowindices   = 0;
213879bdfe76SSatish Balay   b->rowvalues    = 0;
213979bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
214079bdfe76SSatish Balay 
2141a07cd24cSSatish Balay   /* hash table stuff */
2142a07cd24cSSatish Balay   b->ht           = 0;
2143187ce0cbSSatish Balay   b->hd           = 0;
21440bdbc534SSatish Balay   b->ht_size      = 0;
2145133cdb44SSatish Balay   b->ht_flag      = 0;
214625fdafccSSatish Balay   b->ht_fact      = 0;
2147187ce0cbSSatish Balay   b->ht_total_ct  = 0;
2148187ce0cbSSatish Balay   b->ht_insert_ct = 0;
2149a07cd24cSSatish Balay 
215079bdfe76SSatish Balay   *A = B;
2151133cdb44SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2152133cdb44SSatish Balay   if (flg) {
2153133cdb44SSatish Balay     double fact = 1.39;
2154133cdb44SSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2155133cdb44SSatish Balay     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2156133cdb44SSatish Balay     if (fact <= 1.0) fact = 1.39;
2157133cdb44SSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2158133cdb44SSatish Balay     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2159133cdb44SSatish Balay   }
21605ef9f2a5SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
21615ef9f2a5SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
21625ef9f2a5SBarry Smith                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
21633a40ed3dSBarry Smith   PetscFunctionReturn(0);
216479bdfe76SSatish Balay }
2165026e39d0SSatish Balay 
21665615d1e5SSatish Balay #undef __FUNC__
21672e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ"
21682e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
21690ac07820SSatish Balay {
21700ac07820SSatish Balay   Mat         mat;
21710ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
21720ac07820SSatish Balay   int         ierr, len=0, flg;
21730ac07820SSatish Balay 
2174d64ed03dSBarry Smith   PetscFunctionBegin;
21750ac07820SSatish Balay   *newmat       = 0;
21763f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
21770ac07820SSatish Balay   PLogObjectCreate(mat);
21780ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2179cc2dc46cSBarry Smith   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2180e1311b90SBarry Smith   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2181e1311b90SBarry Smith   mat->ops->view       = MatView_MPIBAIJ;
21820ac07820SSatish Balay   mat->factor          = matin->factor;
21830ac07820SSatish Balay   mat->assembled       = PETSC_TRUE;
21840ac07820SSatish Balay 
21850ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
21860ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
21870ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
21880ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
21890ac07820SSatish Balay 
21900ac07820SSatish Balay   a->bs  = oldmat->bs;
21910ac07820SSatish Balay   a->bs2 = oldmat->bs2;
21920ac07820SSatish Balay   a->mbs = oldmat->mbs;
21930ac07820SSatish Balay   a->nbs = oldmat->nbs;
21940ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
21950ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
21960ac07820SSatish Balay 
21970ac07820SSatish Balay   a->rstart       = oldmat->rstart;
21980ac07820SSatish Balay   a->rend         = oldmat->rend;
21990ac07820SSatish Balay   a->cstart       = oldmat->cstart;
22000ac07820SSatish Balay   a->cend         = oldmat->cend;
22010ac07820SSatish Balay   a->size         = oldmat->size;
22020ac07820SSatish Balay   a->rank         = oldmat->rank;
2203e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
22040ac07820SSatish Balay   a->rowvalues    = 0;
22050ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
220630793edcSSatish Balay   a->barray       = 0;
22070ac07820SSatish Balay 
2208133cdb44SSatish Balay   /* hash table stuff */
2209133cdb44SSatish Balay   a->ht           = 0;
2210133cdb44SSatish Balay   a->hd           = 0;
2211133cdb44SSatish Balay   a->ht_size      = 0;
2212133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
221325fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2214133cdb44SSatish Balay   a->ht_total_ct  = 0;
2215133cdb44SSatish Balay   a->ht_insert_ct = 0;
2216133cdb44SSatish Balay 
2217133cdb44SSatish Balay 
22180ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2219f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
22200ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
22210ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
22220ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
22230ac07820SSatish Balay   if (oldmat->colmap) {
222448e59246SSatish Balay #if defined (USE_CTABLE)
222548e59246SSatish Balay   ierr = TableCreateCopy( &a->colmap,oldmat->colmap ); CHKERRQ(ierr);
222648e59246SSatish Balay #else
22270ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
22280ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
22290ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
223048e59246SSatish Balay #endif
22310ac07820SSatish Balay   } else a->colmap = 0;
22320ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
22330ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
22340ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
22350ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
22360ac07820SSatish Balay   } else a->garray = 0;
22370ac07820SSatish Balay 
22380ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
22390ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
22400ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2241e18c124aSSatish Balay 
22420ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
22432e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
22440ac07820SSatish Balay   PLogObjectParent(mat,a->A);
22452e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
22460ac07820SSatish Balay   PLogObjectParent(mat,a->B);
22470ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2248e18c124aSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
22490ac07820SSatish Balay   if (flg) {
22500ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
22510ac07820SSatish Balay   }
22520ac07820SSatish Balay   *newmat = mat;
22533a40ed3dSBarry Smith   PetscFunctionReturn(0);
22540ac07820SSatish Balay }
225557b952d6SSatish Balay 
225657b952d6SSatish Balay #include "sys.h"
225757b952d6SSatish Balay 
22585615d1e5SSatish Balay #undef __FUNC__
22595615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
226057b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
226157b952d6SSatish Balay {
226257b952d6SSatish Balay   Mat          A;
226357b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
226457b952d6SSatish Balay   Scalar       *vals,*buf;
226557b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
226657b952d6SSatish Balay   MPI_Status   status;
2267cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
226857b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
226940011551SBarry Smith   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
227057b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
227157b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
227257b952d6SSatish Balay 
2273d64ed03dSBarry Smith   PetscFunctionBegin;
227457b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
227557b952d6SSatish Balay 
227657b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
227757b952d6SSatish Balay   if (!rank) {
227857b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2279e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2280a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2281d64ed03dSBarry Smith     if (header[3] < 0) {
2282a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2283d64ed03dSBarry Smith     }
22846c5fab8fSBarry Smith   }
2285d64ed03dSBarry Smith 
2286ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
228757b952d6SSatish Balay   M = header[1]; N = header[2];
228857b952d6SSatish Balay 
2289a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
229057b952d6SSatish Balay 
229157b952d6SSatish Balay   /*
229257b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
229357b952d6SSatish Balay      divisible by the blocksize
229457b952d6SSatish Balay   */
229557b952d6SSatish Balay   Mbs        = M/bs;
229657b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
229757b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
229857b952d6SSatish Balay   else                  Mbs++;
229957b952d6SSatish Balay   if (extra_rows &&!rank) {
2300b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
230157b952d6SSatish Balay   }
2302537820f0SBarry Smith 
230357b952d6SSatish Balay   /* determine ownership of all rows */
230457b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
230557b952d6SSatish Balay   m   = mbs * bs;
2306cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2307cee3aa6bSSatish Balay   browners = rowners + size + 1;
2308ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
230957b952d6SSatish Balay   rowners[0] = 0;
2310cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2311cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
231257b952d6SSatish Balay   rstart = rowners[rank];
231357b952d6SSatish Balay   rend   = rowners[rank+1];
231457b952d6SSatish Balay 
231557b952d6SSatish Balay   /* distribute row lengths to all processors */
231657b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
231757b952d6SSatish Balay   if (!rank) {
231857b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2319e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
232057b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
232157b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2322cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2323ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
232457b952d6SSatish Balay     PetscFree(sndcounts);
2325d64ed03dSBarry Smith   } else {
2326ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
232757b952d6SSatish Balay   }
232857b952d6SSatish Balay 
232957b952d6SSatish Balay   if (!rank) {
233057b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
233157b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
233257b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
233357b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
233457b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
233557b952d6SSatish Balay         procsnz[i] += rowlengths[j];
233657b952d6SSatish Balay       }
233757b952d6SSatish Balay     }
233857b952d6SSatish Balay     PetscFree(rowlengths);
233957b952d6SSatish Balay 
234057b952d6SSatish Balay     /* determine max buffer needed and allocate it */
234157b952d6SSatish Balay     maxnz = 0;
234257b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
234357b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
234457b952d6SSatish Balay     }
234557b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
234657b952d6SSatish Balay 
234757b952d6SSatish Balay     /* read in my part of the matrix column indices  */
234857b952d6SSatish Balay     nz = procsnz[0];
234957b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
235057b952d6SSatish Balay     mycols = ibuf;
2351cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2352e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2353cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2354cee3aa6bSSatish Balay 
235557b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
235657b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
235757b952d6SSatish Balay       nz   = procsnz[i];
2358e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2359ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
236057b952d6SSatish Balay     }
236157b952d6SSatish Balay     /* read in the stuff for the last proc */
236257b952d6SSatish Balay     if ( size != 1 ) {
236357b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2364e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
236557b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2366ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
236757b952d6SSatish Balay     }
236857b952d6SSatish Balay     PetscFree(cols);
2369d64ed03dSBarry Smith   } else {
237057b952d6SSatish Balay     /* determine buffer space needed for message */
237157b952d6SSatish Balay     nz = 0;
237257b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
237357b952d6SSatish Balay       nz += locrowlens[i];
237457b952d6SSatish Balay     }
237557b952d6SSatish Balay     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
237657b952d6SSatish Balay     mycols = ibuf;
237757b952d6SSatish Balay     /* receive message of column indices*/
2378ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2379ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2380a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
238157b952d6SSatish Balay   }
238257b952d6SSatish Balay 
238357b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2384cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2385cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
238657b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2387cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
238857b952d6SSatish Balay   masked1 = mask    + Mbs;
238957b952d6SSatish Balay   masked2 = masked1 + Mbs;
239057b952d6SSatish Balay   rowcount = 0; nzcount = 0;
239157b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
239257b952d6SSatish Balay     dcount  = 0;
239357b952d6SSatish Balay     odcount = 0;
239457b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
239557b952d6SSatish Balay       kmax = locrowlens[rowcount];
239657b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
239757b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
239857b952d6SSatish Balay         if (!mask[tmp]) {
239957b952d6SSatish Balay           mask[tmp] = 1;
240057b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
240157b952d6SSatish Balay           else masked1[dcount++] = tmp;
240257b952d6SSatish Balay         }
240357b952d6SSatish Balay       }
240457b952d6SSatish Balay       rowcount++;
240557b952d6SSatish Balay     }
2406cee3aa6bSSatish Balay 
240757b952d6SSatish Balay     dlens[i]  = dcount;
240857b952d6SSatish Balay     odlens[i] = odcount;
2409cee3aa6bSSatish Balay 
241057b952d6SSatish Balay     /* zero out the mask elements we set */
241157b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
241257b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
241357b952d6SSatish Balay   }
2414cee3aa6bSSatish Balay 
241557b952d6SSatish Balay   /* create our matrix */
2416537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2417537820f0SBarry Smith          CHKERRQ(ierr);
241857b952d6SSatish Balay   A = *newmat;
24196d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
242057b952d6SSatish Balay 
242157b952d6SSatish Balay   if (!rank) {
242257b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
242357b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
242457b952d6SSatish Balay     nz = procsnz[0];
242557b952d6SSatish Balay     vals = buf;
2426cee3aa6bSSatish Balay     mycols = ibuf;
2427cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2428e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2429cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2430537820f0SBarry Smith 
243157b952d6SSatish Balay     /* insert into matrix */
243257b952d6SSatish Balay     jj      = rstart*bs;
243357b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
243457b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
243557b952d6SSatish Balay       mycols += locrowlens[i];
243657b952d6SSatish Balay       vals   += locrowlens[i];
243757b952d6SSatish Balay       jj++;
243857b952d6SSatish Balay     }
243957b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
244057b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
244157b952d6SSatish Balay       nz   = procsnz[i];
244257b952d6SSatish Balay       vals = buf;
2443e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2444ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
244557b952d6SSatish Balay     }
244657b952d6SSatish Balay     /* the last proc */
244757b952d6SSatish Balay     if ( size != 1 ){
244857b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2449cee3aa6bSSatish Balay       vals = buf;
2450e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
245157b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2452ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
245357b952d6SSatish Balay     }
245457b952d6SSatish Balay     PetscFree(procsnz);
2455d64ed03dSBarry Smith   } else {
245657b952d6SSatish Balay     /* receive numeric values */
245757b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
245857b952d6SSatish Balay 
245957b952d6SSatish Balay     /* receive message of values*/
246057b952d6SSatish Balay     vals   = buf;
2461cee3aa6bSSatish Balay     mycols = ibuf;
2462ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2463ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2464a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
246557b952d6SSatish Balay 
246657b952d6SSatish Balay     /* insert into matrix */
246757b952d6SSatish Balay     jj      = rstart*bs;
2468cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
246957b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
247057b952d6SSatish Balay       mycols += locrowlens[i];
247157b952d6SSatish Balay       vals   += locrowlens[i];
247257b952d6SSatish Balay       jj++;
247357b952d6SSatish Balay     }
247457b952d6SSatish Balay   }
247557b952d6SSatish Balay   PetscFree(locrowlens);
247657b952d6SSatish Balay   PetscFree(buf);
247757b952d6SSatish Balay   PetscFree(ibuf);
247857b952d6SSatish Balay   PetscFree(rowners);
247957b952d6SSatish Balay   PetscFree(dlens);
2480cee3aa6bSSatish Balay   PetscFree(mask);
24816d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
24826d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
24833a40ed3dSBarry Smith   PetscFunctionReturn(0);
248457b952d6SSatish Balay }
248557b952d6SSatish Balay 
248657b952d6SSatish Balay 
2487133cdb44SSatish Balay 
2488133cdb44SSatish Balay #undef __FUNC__
2489133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2490133cdb44SSatish Balay /*@
2491133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2492133cdb44SSatish Balay 
2493133cdb44SSatish Balay    Input Parameters:
2494133cdb44SSatish Balay .  mat  - the matrix
2495133cdb44SSatish Balay .  fact - factor
2496133cdb44SSatish Balay 
2497fee21e36SBarry Smith    Collective on Mat
2498fee21e36SBarry Smith 
2499133cdb44SSatish Balay   Notes:
2500133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2501133cdb44SSatish Balay 
2502133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2503133cdb44SSatish Balay 
2504133cdb44SSatish Balay .seealso: MatSetOption()
2505133cdb44SSatish Balay @*/
2506133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2507133cdb44SSatish Balay {
250825fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2509133cdb44SSatish Balay 
2510133cdb44SSatish Balay   PetscFunctionBegin;
2511133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
251225fdafccSSatish Balay   if (mat->type != MATMPIBAIJ) {
2513133cdb44SSatish Balay       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2514133cdb44SSatish Balay   }
2515133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*) mat->data;
2516133cdb44SSatish Balay   baij->ht_fact = fact;
2517133cdb44SSatish Balay   PetscFunctionReturn(0);
2518133cdb44SSatish Balay }
2519