xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e18c124a844f8a09db1af4148adb1e1cdba562d7)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*e18c124aSSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.139 1998/12/17 22:10:47 bsmith Exp bsmith $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
577ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
957b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
10d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
11d212a18eSSatish Balay extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatGetSubMatrixCall,Mat **);
12946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
133b2fbd54SBarry Smith 
14537820f0SBarry Smith /*
15537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
1657b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
1757b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
1857b952d6SSatish Balay    length of colmap equals the global matrix length.
1957b952d6SSatish Balay */
205615d1e5SSatish Balay #undef __FUNC__
215615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
2257b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2357b952d6SSatish Balay {
2457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
2557b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
26928fc39bSSatish Balay   int         nbs = B->nbs,i,bs=B->bs;;
2757b952d6SSatish Balay 
28d64ed03dSBarry Smith   PetscFunctionBegin;
29758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
3057b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
3157b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
32928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
333a40ed3dSBarry Smith   PetscFunctionReturn(0);
3457b952d6SSatish Balay }
3557b952d6SSatish Balay 
3680c1aa95SSatish Balay #define CHUNKSIZE  10
3780c1aa95SSatish Balay 
38f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
3980c1aa95SSatish Balay { \
4080c1aa95SSatish Balay  \
4180c1aa95SSatish Balay     brow = row/bs;  \
4280c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
43ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
4480c1aa95SSatish Balay       bcol = col/bs; \
4580c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
46ab26458aSBarry Smith       low = 0; high = nrow; \
47ab26458aSBarry Smith       while (high-low > 3) { \
48ab26458aSBarry Smith         t = (low+high)/2; \
49ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
50ab26458aSBarry Smith         else              low  = t; \
51ab26458aSBarry Smith       } \
52ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
5380c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
5480c1aa95SSatish Balay         if (rp[_i] == bcol) { \
5580c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
56eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
57eada6651SSatish Balay           else                    *bap  = value;  \
58ac7a638eSSatish Balay           goto a_noinsert; \
5980c1aa95SSatish Balay         } \
6080c1aa95SSatish Balay       } \
6189280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
62a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
6380c1aa95SSatish Balay       if (nrow >= rmax) { \
6480c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
6580c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
6680c1aa95SSatish Balay         Scalar *new_a; \
6780c1aa95SSatish Balay  \
68a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
6989280ab3SLois Curfman McInnes  \
7080c1aa95SSatish Balay         /* malloc new storage space */ \
7180c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
7280c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
7380c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
7480c1aa95SSatish Balay         new_i   = new_j + new_nz; \
7580c1aa95SSatish Balay  \
7680c1aa95SSatish Balay         /* copy over old data into new slots */ \
7780c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
7880c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
7980c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
8080c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
8180c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
8280c1aa95SSatish Balay                                                            len*sizeof(int)); \
8380c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
8480c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
8580c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
8680c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
8780c1aa95SSatish Balay         /* free up old matrix storage */ \
8880c1aa95SSatish Balay         PetscFree(a->a);  \
8980c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
9080c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
9180c1aa95SSatish Balay         a->singlemalloc = 1; \
9280c1aa95SSatish Balay  \
9380c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
94ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
9580c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
9680c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
9780c1aa95SSatish Balay         a->reallocs++; \
9880c1aa95SSatish Balay         a->nz++; \
9980c1aa95SSatish Balay       } \
10080c1aa95SSatish Balay       N = nrow++ - 1;  \
10180c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
10280c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
10380c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
10480c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
10580c1aa95SSatish Balay       } \
10680c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
10780c1aa95SSatish Balay       rp[_i]                      = bcol;  \
10880c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
109ac7a638eSSatish Balay       a_noinsert:; \
11080c1aa95SSatish Balay     ailen[brow] = nrow; \
11180c1aa95SSatish Balay }
11257b952d6SSatish Balay 
113ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
114ac7a638eSSatish Balay { \
115ac7a638eSSatish Balay  \
116ac7a638eSSatish Balay     brow = row/bs;  \
117ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
118ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
119ac7a638eSSatish Balay       bcol = col/bs; \
120ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
121ac7a638eSSatish Balay       low = 0; high = nrow; \
122ac7a638eSSatish Balay       while (high-low > 3) { \
123ac7a638eSSatish Balay         t = (low+high)/2; \
124ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
125ac7a638eSSatish Balay         else              low  = t; \
126ac7a638eSSatish Balay       } \
127ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
128ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
129ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
130ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
131ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
132ac7a638eSSatish Balay           else                    *bap  = value;  \
133ac7a638eSSatish Balay           goto b_noinsert; \
134ac7a638eSSatish Balay         } \
135ac7a638eSSatish Balay       } \
13689280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
137a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
138ac7a638eSSatish Balay       if (nrow >= rmax) { \
139ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
14074c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
141ac7a638eSSatish Balay         Scalar *new_a; \
142ac7a638eSSatish Balay  \
143a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
14489280ab3SLois Curfman McInnes  \
145ac7a638eSSatish Balay         /* malloc new storage space */ \
14674c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
147ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
148ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
149ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
150ac7a638eSSatish Balay  \
151ac7a638eSSatish Balay         /* copy over old data into new slots */ \
152ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
15374c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
154ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
155ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
156ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
157ac7a638eSSatish Balay                                                            len*sizeof(int)); \
158ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
159ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
160ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
161ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
162ac7a638eSSatish Balay         /* free up old matrix storage */ \
16374c639caSSatish Balay         PetscFree(b->a);  \
16474c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
16574c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
16674c639caSSatish Balay         b->singlemalloc = 1; \
167ac7a638eSSatish Balay  \
168ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
169ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
17074c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
17174c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
17274c639caSSatish Balay         b->reallocs++; \
17374c639caSSatish Balay         b->nz++; \
174ac7a638eSSatish Balay       } \
175ac7a638eSSatish Balay       N = nrow++ - 1;  \
176ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
177ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
178ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
179ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
180ac7a638eSSatish Balay       } \
181ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
182ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
183ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
184ac7a638eSSatish Balay       b_noinsert:; \
185ac7a638eSSatish Balay     bilen[brow] = nrow; \
186ac7a638eSSatish Balay }
187ac7a638eSSatish Balay 
1885615d1e5SSatish Balay #undef __FUNC__
1895615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
190ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
19157b952d6SSatish Balay {
19257b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
19357b952d6SSatish Balay   Scalar      value;
1944fa0d573SSatish Balay   int         ierr,i,j,row,col;
1954fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
1964fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
1974fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
19857b952d6SSatish Balay 
199eada6651SSatish Balay   /* Some Variables required in the macro */
20080c1aa95SSatish Balay   Mat         A = baij->A;
20180c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
202ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203ac7a638eSSatish Balay   Scalar      *aa=a->a;
204ac7a638eSSatish Balay 
205ac7a638eSSatish Balay   Mat         B = baij->B;
206ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
207ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208ac7a638eSSatish Balay   Scalar      *ba=b->a;
209ac7a638eSSatish Balay 
210ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
211ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
212ac7a638eSSatish Balay   Scalar      *ap,*bap;
21380c1aa95SSatish Balay 
214d64ed03dSBarry Smith   PetscFunctionBegin;
21557b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
2163a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
217a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
218a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
219639f9d9dSBarry Smith #endif
22057b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
22157b952d6SSatish Balay       row = im[i] - rstart_orig;
22257b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
22357b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
22457b952d6SSatish Balay           col = in[j] - cstart_orig;
22557b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
22780c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
22857b952d6SSatish Balay         }
2293a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
230a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
231a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
232639f9d9dSBarry Smith #endif
23357b952d6SSatish Balay         else {
23457b952d6SSatish Balay           if (mat->was_assembled) {
235905e6a2fSBarry Smith             if (!baij->colmap) {
236905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237905e6a2fSBarry Smith             }
238905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
23957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
24057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
24157b952d6SSatish Balay               col =  in[j];
2429bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2439bf004c3SSatish Balay               B = baij->B;
2449bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2459bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2469bf004c3SSatish Balay               ba=b->a;
24757b952d6SSatish Balay             }
248d64ed03dSBarry Smith           } else col = in[j];
24957b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
250ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
251ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
25257b952d6SSatish Balay         }
25357b952d6SSatish Balay       }
254d64ed03dSBarry Smith     } else {
25590f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
25657b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
257d64ed03dSBarry Smith       } else {
25890f02eecSBarry Smith         if (!baij->donotstash) {
25957b952d6SSatish Balay           row = im[i];
26057b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
26157b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
26257b952d6SSatish Balay           }
26357b952d6SSatish Balay         }
26457b952d6SSatish Balay       }
26557b952d6SSatish Balay     }
26690f02eecSBarry Smith   }
2673a40ed3dSBarry Smith   PetscFunctionReturn(0);
26857b952d6SSatish Balay }
26957b952d6SSatish Balay 
270ab26458aSBarry Smith extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
271ab26458aSBarry Smith #undef __FUNC__
272ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
273ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
274ab26458aSBarry Smith {
275ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
27630793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
277abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
278ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
279ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
280ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
281ab26458aSBarry Smith 
28230793edcSSatish Balay   if(!barray) {
28347513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
28430793edcSSatish Balay   }
28530793edcSSatish Balay 
286ab26458aSBarry Smith   if (roworiented) {
287ab26458aSBarry Smith     stepval = (n-1)*bs;
288ab26458aSBarry Smith   } else {
289ab26458aSBarry Smith     stepval = (m-1)*bs;
290ab26458aSBarry Smith   }
291ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
2923a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
293a8c6a408SBarry Smith     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
294a8c6a408SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
295ab26458aSBarry Smith #endif
296ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
297ab26458aSBarry Smith       row = im[i] - rstart;
298ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
29915b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
30015b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
30115b57d14SSatish Balay           barray = v + i*bs2;
30215b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
30315b57d14SSatish Balay           barray = v + j*bs2;
30415b57d14SSatish Balay         } else { /* Here a copy is required */
305ab26458aSBarry Smith           if (roworiented) {
306ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
307ab26458aSBarry Smith           } else {
308ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
309abef11f7SSatish Balay           }
31047513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
31147513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
31230793edcSSatish Balay               *barray++  = *value++;
31347513183SBarry Smith             }
31447513183SBarry Smith           }
31530793edcSSatish Balay           barray -=bs2;
31615b57d14SSatish Balay         }
317abef11f7SSatish Balay 
318abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
319abef11f7SSatish Balay           col  = in[j] - cstart;
32030793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
321ab26458aSBarry Smith         }
3223a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
323a8c6a408SBarry Smith         else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");}
324a8c6a408SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");}
325ab26458aSBarry Smith #endif
326ab26458aSBarry Smith         else {
327ab26458aSBarry Smith           if (mat->was_assembled) {
328ab26458aSBarry Smith             if (!baij->colmap) {
329ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
330ab26458aSBarry Smith             }
331a5eb4965SSatish Balay 
3323a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
333a8c6a408SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
334a5eb4965SSatish Balay #endif
335a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
336ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
337ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
338ab26458aSBarry Smith               col =  in[j];
339ab26458aSBarry Smith             }
340ab26458aSBarry Smith           }
341ab26458aSBarry Smith           else col = in[j];
34230793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
343ab26458aSBarry Smith         }
344ab26458aSBarry Smith       }
345d64ed03dSBarry Smith     } else {
346ab26458aSBarry Smith       if (!baij->donotstash) {
347ab26458aSBarry Smith         if (roworiented ) {
348abef11f7SSatish Balay           row   = im[i]*bs;
349abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
350abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
351abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
352abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
353abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
354abef11f7SSatish Balay               }
355ab26458aSBarry Smith             }
356ab26458aSBarry Smith           }
357d64ed03dSBarry Smith         } else {
358ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
359abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
360abef11f7SSatish Balay             col   = in[j]*bs;
361abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
362abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
363abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
364ab26458aSBarry Smith               }
365ab26458aSBarry Smith             }
366ab26458aSBarry Smith           }
367abef11f7SSatish Balay         }
368abef11f7SSatish Balay       }
369ab26458aSBarry Smith     }
370ab26458aSBarry Smith   }
3713a40ed3dSBarry Smith   PetscFunctionReturn(0);
372ab26458aSBarry Smith }
3730bdbc534SSatish Balay #define HASH_KEY 0.6180339887
374c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
375c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
376c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
3775615d1e5SSatish Balay #undef __FUNC__
3780bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
3790bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
3800bdbc534SSatish Balay {
3810bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3820bdbc534SSatish Balay   int         ierr,i,j,row,col;
3830bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
384c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
385c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
386c2760754SSatish Balay   double      tmp;
387b9e4cc15SSatish Balay   Scalar      ** HD = baij->hd,value;
3884a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
3894a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
3904a15367fSSatish Balay #endif
3910bdbc534SSatish Balay 
3920bdbc534SSatish Balay   PetscFunctionBegin;
3930bdbc534SSatish Balay 
3940bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
3950bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
3960bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
3970bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
3980bdbc534SSatish Balay #endif
3990bdbc534SSatish Balay       row = im[i];
400c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4010bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4020bdbc534SSatish Balay         col = in[j];
4030bdbc534SSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4040bdbc534SSatish Balay         /* Look up into the Hash Table */
405c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
406c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4070bdbc534SSatish Balay 
408c2760754SSatish Balay 
409c2760754SSatish Balay         idx = h1;
410187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
411187ce0cbSSatish Balay         insert_ct++;
412187ce0cbSSatish Balay         total_ct++;
413187ce0cbSSatish Balay         if (HT[idx] != key) {
414187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
415187ce0cbSSatish Balay           if (idx == size) {
416187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
417187ce0cbSSatish Balay             if (idx == h1) {
418187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
419187ce0cbSSatish Balay             }
420187ce0cbSSatish Balay           }
421187ce0cbSSatish Balay         }
422187ce0cbSSatish Balay #else
423c2760754SSatish Balay         if (HT[idx] != key) {
424c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
425c2760754SSatish Balay           if (idx == size) {
426c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
427c2760754SSatish Balay             if (idx == h1) {
428c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
429c2760754SSatish Balay             }
430c2760754SSatish Balay           }
431c2760754SSatish Balay         }
432187ce0cbSSatish Balay #endif
433c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
434c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
435c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
4360bdbc534SSatish Balay       }
4370bdbc534SSatish Balay     } else {
4380bdbc534SSatish Balay       if (roworiented && !baij->donotstash) {
4390bdbc534SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
4400bdbc534SSatish Balay       } else {
4410bdbc534SSatish Balay         if (!baij->donotstash) {
4420bdbc534SSatish Balay           row = im[i];
4430bdbc534SSatish Balay 	  for ( j=0; j<n; j++ ) {
4440bdbc534SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
4450bdbc534SSatish Balay           }
4460bdbc534SSatish Balay         }
4470bdbc534SSatish Balay       }
4480bdbc534SSatish Balay     }
4490bdbc534SSatish Balay   }
450187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
451187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
452187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
453187ce0cbSSatish Balay #endif
4540bdbc534SSatish Balay   PetscFunctionReturn(0);
4550bdbc534SSatish Balay }
4560bdbc534SSatish Balay 
4570bdbc534SSatish Balay #undef __FUNC__
4580bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
4590bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4600bdbc534SSatish Balay {
4610bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4620bdbc534SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
4630bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
464b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
465c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
466c2760754SSatish Balay   double      tmp;
467187ce0cbSSatish Balay   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
4684a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
4694a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4704a15367fSSatish Balay #endif
4710bdbc534SSatish Balay 
472d0a41580SSatish Balay   PetscFunctionBegin;
473d0a41580SSatish Balay 
4740bdbc534SSatish Balay   if (roworiented) {
4750bdbc534SSatish Balay     stepval = (n-1)*bs;
4760bdbc534SSatish Balay   } else {
4770bdbc534SSatish Balay     stepval = (m-1)*bs;
4780bdbc534SSatish Balay   }
4790bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
4800bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
4810bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4820bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4830bdbc534SSatish Balay #endif
4840bdbc534SSatish Balay     row   = im[i];
485187ce0cbSSatish Balay     v_t   = v + i*bs2;
486c2760754SSatish Balay     if (row >= rstart && row < rend) {
4870bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4880bdbc534SSatish Balay         col = in[j];
4890bdbc534SSatish Balay 
4900bdbc534SSatish Balay         /* Look up into the Hash Table */
491c2760754SSatish Balay         key = row*Nbs+col+1;
492c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4930bdbc534SSatish Balay 
494c2760754SSatish Balay         idx = h1;
495187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
496187ce0cbSSatish Balay         total_ct++;
497187ce0cbSSatish Balay         insert_ct++;
498187ce0cbSSatish Balay        if (HT[idx] != key) {
499187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
500187ce0cbSSatish Balay           if (idx == size) {
501187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
502187ce0cbSSatish Balay             if (idx == h1) {
503187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
504187ce0cbSSatish Balay             }
505187ce0cbSSatish Balay           }
506187ce0cbSSatish Balay         }
507187ce0cbSSatish Balay #else
508c2760754SSatish Balay         if (HT[idx] != key) {
509c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
510c2760754SSatish Balay           if (idx == size) {
511c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
512c2760754SSatish Balay             if (idx == h1) {
513c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
514c2760754SSatish Balay             }
515c2760754SSatish Balay           }
516c2760754SSatish Balay         }
517187ce0cbSSatish Balay #endif
518c2760754SSatish Balay         baij_a = HD[idx];
5190bdbc534SSatish Balay         if (roworiented) {
520c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
521187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
522187ce0cbSSatish Balay           value = v_t;
523187ce0cbSSatish Balay           v_t  += bs;
524fef45726SSatish Balay           if (addv == ADD_VALUES) {
525c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
526c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
527fef45726SSatish Balay                 baij_a[jj]  += *value++;
528b4cc0f5aSSatish Balay               }
529b4cc0f5aSSatish Balay             }
530fef45726SSatish Balay           } else {
531c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
532c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
533fef45726SSatish Balay                 baij_a[jj]  = *value++;
534fef45726SSatish Balay               }
535fef45726SSatish Balay             }
536fef45726SSatish Balay           }
5370bdbc534SSatish Balay         } else {
5380bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
539fef45726SSatish Balay           if (addv == ADD_VALUES) {
540b4cc0f5aSSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
5410bdbc534SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
542fef45726SSatish Balay                 baij_a[jj]  += *value++;
543fef45726SSatish Balay               }
544fef45726SSatish Balay             }
545fef45726SSatish Balay           } else {
546fef45726SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
547fef45726SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
548fef45726SSatish Balay                 baij_a[jj]  = *value++;
549fef45726SSatish Balay               }
550b4cc0f5aSSatish Balay             }
5510bdbc534SSatish Balay           }
5520bdbc534SSatish Balay         }
5530bdbc534SSatish Balay       }
5540bdbc534SSatish Balay     } else {
5550bdbc534SSatish Balay       if (!baij->donotstash) {
5560bdbc534SSatish Balay         if (roworiented ) {
5570bdbc534SSatish Balay           row   = im[i]*bs;
5580bdbc534SSatish Balay           value = v + i*(stepval+bs)*bs;
5590bdbc534SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
5600bdbc534SSatish Balay             for ( k=0; k<n; k++ ) {
5610bdbc534SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
5620bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
5630bdbc534SSatish Balay               }
5640bdbc534SSatish Balay             }
5650bdbc534SSatish Balay           }
5660bdbc534SSatish Balay         } else {
5670bdbc534SSatish Balay           for ( j=0; j<n; j++ ) {
5680bdbc534SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
5690bdbc534SSatish Balay             col   = in[j]*bs;
5700bdbc534SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
5710bdbc534SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
5720bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
5730bdbc534SSatish Balay               }
5740bdbc534SSatish Balay             }
5750bdbc534SSatish Balay           }
5760bdbc534SSatish Balay         }
5770bdbc534SSatish Balay       }
5780bdbc534SSatish Balay     }
5790bdbc534SSatish Balay   }
580187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
581187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
582187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
583187ce0cbSSatish Balay #endif
5840bdbc534SSatish Balay   PetscFunctionReturn(0);
5850bdbc534SSatish Balay }
586133cdb44SSatish Balay 
5870bdbc534SSatish Balay #undef __FUNC__
5885615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
589ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
590d6de1c52SSatish Balay {
591d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
592d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
593d6de1c52SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col;
594d6de1c52SSatish Balay 
595133cdb44SSatish Balay   PetscFunctionBegin;
596d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
597a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
598a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
599d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
600d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
601d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
602a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
603a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
604d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
605d6de1c52SSatish Balay           col = idxn[j] - bscstart;
60698dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
607d64ed03dSBarry Smith         } else {
608905e6a2fSBarry Smith           if (!baij->colmap) {
609905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
610905e6a2fSBarry Smith           }
611e60e1c95SSatish Balay           if((baij->colmap[idxn[j]/bs]-1 < 0) ||
612dcb20de4SSatish Balay              (baij->garray[(baij->colmap[idxn[j]/bs]-1)/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
613d9d09a02SSatish Balay           else {
614dcb20de4SSatish Balay             col  = (baij->colmap[idxn[j]/bs]-1) + idxn[j]%bs;
61598dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
616d6de1c52SSatish Balay           }
617d6de1c52SSatish Balay         }
618d6de1c52SSatish Balay       }
619d64ed03dSBarry Smith     } else {
620a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
621d6de1c52SSatish Balay     }
622d6de1c52SSatish Balay   }
6233a40ed3dSBarry Smith  PetscFunctionReturn(0);
624d6de1c52SSatish Balay }
625d6de1c52SSatish Balay 
6265615d1e5SSatish Balay #undef __FUNC__
6275615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
628ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
629d6de1c52SSatish Balay {
630d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
631d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
632acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
633d6de1c52SSatish Balay   double     sum = 0.0;
634d6de1c52SSatish Balay   Scalar     *v;
635d6de1c52SSatish Balay 
636d64ed03dSBarry Smith   PetscFunctionBegin;
637d6de1c52SSatish Balay   if (baij->size == 1) {
638d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
639d6de1c52SSatish Balay   } else {
640d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
641d6de1c52SSatish Balay       v = amat->a;
642d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
6433a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
644e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
645d6de1c52SSatish Balay #else
646d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
647d6de1c52SSatish Balay #endif
648d6de1c52SSatish Balay       }
649d6de1c52SSatish Balay       v = bmat->a;
650d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
6513a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
652e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
653d6de1c52SSatish Balay #else
654d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
655d6de1c52SSatish Balay #endif
656d6de1c52SSatish Balay       }
657ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
658d6de1c52SSatish Balay       *norm = sqrt(*norm);
659d64ed03dSBarry Smith     } else {
660e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
661d6de1c52SSatish Balay     }
662d64ed03dSBarry Smith   }
6633a40ed3dSBarry Smith   PetscFunctionReturn(0);
664d6de1c52SSatish Balay }
66557b952d6SSatish Balay 
6665615d1e5SSatish Balay #undef __FUNC__
6675615d1e5SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
668ec1ea8d8SLois Curfman McInnes int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
66957b952d6SSatish Balay {
67057b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
67157b952d6SSatish Balay   MPI_Comm    comm = mat->comm;
67257b952d6SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
67357b952d6SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
67457b952d6SSatish Balay   MPI_Request *send_waits,*recv_waits;
67557b952d6SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
67657b952d6SSatish Balay   InsertMode  addv;
67757b952d6SSatish Balay   Scalar      *rvalues,*svalues;
67857b952d6SSatish Balay 
679d64ed03dSBarry Smith   PetscFunctionBegin;
680570da906SBarry Smith   if (baij->donotstash) {
681570da906SBarry Smith     baij->svalues    = 0; baij->rvalues    = 0;
682570da906SBarry Smith     baij->nsends     = 0; baij->nrecvs     = 0;
683570da906SBarry Smith     baij->send_waits = 0; baij->recv_waits = 0;
684570da906SBarry Smith     baij->rmax       = 0;
685570da906SBarry Smith     PetscFunctionReturn(0);
686570da906SBarry Smith   }
687570da906SBarry Smith 
68857b952d6SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
689ca161407SBarry Smith   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
69057b952d6SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
691a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
69257b952d6SSatish Balay   }
693e0fa3b82SLois Curfman McInnes   mat->insertmode = addv; /* in case this processor had no cache */
69457b952d6SSatish Balay 
69557b952d6SSatish Balay   /*  first count number of contributors to each processor */
69657b952d6SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
69757b952d6SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
69857b952d6SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
69957b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
70057b952d6SSatish Balay     idx = baij->stash.idx[i];
70157b952d6SSatish Balay     for ( j=0; j<size; j++ ) {
70257b952d6SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
70357b952d6SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
70457b952d6SSatish Balay       }
70557b952d6SSatish Balay     }
70657b952d6SSatish Balay   }
70757b952d6SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
70857b952d6SSatish Balay 
70957b952d6SSatish Balay   /* inform other processors of number of messages and max length*/
71057b952d6SSatish Balay   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
711ca161407SBarry Smith   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
71257b952d6SSatish Balay   nreceives = work[rank];
713ca161407SBarry Smith   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
71457b952d6SSatish Balay   nmax      = work[rank];
71557b952d6SSatish Balay   PetscFree(work);
71657b952d6SSatish Balay 
71757b952d6SSatish Balay   /* post receives:
71857b952d6SSatish Balay        1) each message will consist of ordered pairs
71957b952d6SSatish Balay      (global index,value) we store the global index as a double
72057b952d6SSatish Balay      to simplify the message passing.
72157b952d6SSatish Balay        2) since we don't know how long each individual message is we
72257b952d6SSatish Balay      allocate the largest needed buffer for each receive. Potentially
72357b952d6SSatish Balay      this is a lot of wasted space.
72457b952d6SSatish Balay 
72557b952d6SSatish Balay 
72657b952d6SSatish Balay        This could be done better.
72757b952d6SSatish Balay   */
728f8abc2e8SBarry Smith   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
729f8abc2e8SBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
73057b952d6SSatish Balay   for ( i=0; i<nreceives; i++ ) {
731ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
73257b952d6SSatish Balay   }
73357b952d6SSatish Balay 
73457b952d6SSatish Balay   /* do sends:
73557b952d6SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
73657b952d6SSatish Balay          the ith processor
73757b952d6SSatish Balay   */
73857b952d6SSatish Balay   svalues    = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
739d64ed03dSBarry Smith   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
74057b952d6SSatish Balay   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
74157b952d6SSatish Balay   starts[0] = 0;
74257b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
74357b952d6SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
74457b952d6SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
74557b952d6SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
74657b952d6SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
74757b952d6SSatish Balay   }
74857b952d6SSatish Balay   PetscFree(owner);
74957b952d6SSatish Balay   starts[0] = 0;
75057b952d6SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
75157b952d6SSatish Balay   count = 0;
75257b952d6SSatish Balay   for ( i=0; i<size; i++ ) {
75357b952d6SSatish Balay     if (procs[i]) {
754ca161407SBarry Smith       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
75557b952d6SSatish Balay     }
75657b952d6SSatish Balay   }
75757b952d6SSatish Balay   PetscFree(starts); PetscFree(nprocs);
75857b952d6SSatish Balay 
75957b952d6SSatish Balay   /* Free cache space */
76010a665d1SBarry Smith   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
76157b952d6SSatish Balay   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
76257b952d6SSatish Balay 
76357b952d6SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
76457b952d6SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
76557b952d6SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
76657b952d6SSatish Balay   baij->rmax       = nmax;
76757b952d6SSatish Balay 
7683a40ed3dSBarry Smith   PetscFunctionReturn(0);
76957b952d6SSatish Balay }
770bd7f49f5SSatish Balay 
771fef45726SSatish Balay /*
772fef45726SSatish Balay   Creates the hash table, and sets the table
773fef45726SSatish Balay   This table is created only once.
774fef45726SSatish Balay   If new entried need to be added to the matrix
775fef45726SSatish Balay   then the hash table has to be destroyed and
776fef45726SSatish Balay   recreated.
777fef45726SSatish Balay */
778fef45726SSatish Balay #undef __FUNC__
779fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
780d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
781596b8d2eSBarry Smith {
782596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
783596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
784596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
7850bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
7864a15367fSSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart;
787187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
788fef45726SSatish Balay   int         *HT,key;
7890bdbc534SSatish Balay   Scalar      **HD;
790c2760754SSatish Balay   double      tmp;
7914a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
7924a15367fSSatish Balay   int         ct=0,max=0;
7934a15367fSSatish Balay #endif
794fef45726SSatish Balay 
795d64ed03dSBarry Smith   PetscFunctionBegin;
7960bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
7970bdbc534SSatish Balay   size = baij->ht_size;
798fef45726SSatish Balay 
7990bdbc534SSatish Balay   if (baij->ht) {
8000bdbc534SSatish Balay     PetscFunctionReturn(0);
801596b8d2eSBarry Smith   }
8020bdbc534SSatish Balay 
803fef45726SSatish Balay   /* Allocate Memory for Hash Table */
804b9e4cc15SSatish Balay   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
805b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
806b9e4cc15SSatish Balay   HD = baij->hd;
807a07cd24cSSatish Balay   HT = baij->ht;
808b9e4cc15SSatish Balay 
809b9e4cc15SSatish Balay 
810c2760754SSatish Balay   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
8110bdbc534SSatish Balay 
812596b8d2eSBarry Smith 
813596b8d2eSBarry Smith   /* Loop Over A */
8140bdbc534SSatish Balay   for ( i=0; i<a->mbs; i++ ) {
815596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
8160bdbc534SSatish Balay       row = i+rstart;
8170bdbc534SSatish Balay       col = aj[j]+cstart;
818596b8d2eSBarry Smith 
819187ce0cbSSatish Balay       key = row*Nbs + col + 1;
820c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8210bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
8220bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8230bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8240bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
825596b8d2eSBarry Smith           break;
826187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
827187ce0cbSSatish Balay         } else {
828187ce0cbSSatish Balay           ct++;
829187ce0cbSSatish Balay #endif
830596b8d2eSBarry Smith         }
831187ce0cbSSatish Balay       }
832187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
833187ce0cbSSatish Balay       if (k> max) max = k;
834187ce0cbSSatish Balay #endif
835596b8d2eSBarry Smith     }
836596b8d2eSBarry Smith   }
837596b8d2eSBarry Smith   /* Loop Over B */
8380bdbc534SSatish Balay   for ( i=0; i<b->mbs; i++ ) {
839596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
8400bdbc534SSatish Balay       row = i+rstart;
8410bdbc534SSatish Balay       col = garray[bj[j]];
842187ce0cbSSatish Balay       key = row*Nbs + col + 1;
843c2760754SSatish Balay       h1  = HASH(size,key,tmp);
8440bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
8450bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
8460bdbc534SSatish Balay           HT[(h1+k)%size] = key;
8470bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
848596b8d2eSBarry Smith           break;
849187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
850187ce0cbSSatish Balay         } else {
851187ce0cbSSatish Balay           ct++;
852187ce0cbSSatish Balay #endif
853596b8d2eSBarry Smith         }
854187ce0cbSSatish Balay       }
855187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
856187ce0cbSSatish Balay       if (k> max) max = k;
857187ce0cbSSatish Balay #endif
858596b8d2eSBarry Smith     }
859596b8d2eSBarry Smith   }
860596b8d2eSBarry Smith 
861596b8d2eSBarry Smith   /* Print Summary */
862187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
863c2760754SSatish Balay   for ( i=0,j=0; i<size; i++)
864596b8d2eSBarry Smith     if (HT[i]) {j++;}
865187ce0cbSSatish Balay   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
866187ce0cbSSatish Balay            (j== 0)? 0.0:((double)(ct+j))/j,max);
867187ce0cbSSatish Balay #endif
8683a40ed3dSBarry Smith   PetscFunctionReturn(0);
869596b8d2eSBarry Smith }
87057b952d6SSatish Balay 
8715615d1e5SSatish Balay #undef __FUNC__
8725615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
873ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
87457b952d6SSatish Balay {
87557b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
87657b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
87757b952d6SSatish Balay   int         imdex,nrecvs = baij->nrecvs, count = nrecvs, i, n, ierr;
878b7029e64SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
87957b952d6SSatish Balay   Scalar      *values,val;
880e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
88157b952d6SSatish Balay 
882d64ed03dSBarry Smith   PetscFunctionBegin;
88357b952d6SSatish Balay   /*  wait on receives */
88457b952d6SSatish Balay   while (count) {
885ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
88657b952d6SSatish Balay     /* unpack receives into our local space */
88757b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
888ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
88957b952d6SSatish Balay     n = n/3;
89057b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
89157b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
89257b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
89357b952d6SSatish Balay       val = values[3*i+2];
89457b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
89557b952d6SSatish Balay         col -= baij->cstart*bs;
8966fd7127cSSatish Balay         ierr = MatSetValues(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
897d64ed03dSBarry Smith       } else {
89857b952d6SSatish Balay         if (mat->was_assembled) {
899905e6a2fSBarry Smith           if (!baij->colmap) {
900905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
901905e6a2fSBarry Smith           }
902a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
90357b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
90457b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
90557b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
90657b952d6SSatish Balay           }
90757b952d6SSatish Balay         }
9086fd7127cSSatish Balay         ierr = MatSetValues(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
90957b952d6SSatish Balay       }
91057b952d6SSatish Balay     }
91157b952d6SSatish Balay     count--;
91257b952d6SSatish Balay   }
913570da906SBarry Smith   if (baij->recv_waits) PetscFree(baij->recv_waits);
914570da906SBarry Smith   if (baij->rvalues)    PetscFree(baij->rvalues);
91557b952d6SSatish Balay 
91657b952d6SSatish Balay   /* wait on sends */
91757b952d6SSatish Balay   if (baij->nsends) {
918d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
919ca161407SBarry Smith     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
92057b952d6SSatish Balay     PetscFree(send_status);
92157b952d6SSatish Balay   }
922570da906SBarry Smith   if (baij->send_waits) PetscFree(baij->send_waits);
923570da906SBarry Smith   if (baij->svalues)    PetscFree(baij->svalues);
92457b952d6SSatish Balay 
92557b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
92657b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
92757b952d6SSatish Balay 
92857b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
92957b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
9306e713f22SBarry Smith   /*
9316e713f22SBarry Smith      if nonzero structure of submatrix B cannot change then we know that
9326e713f22SBarry Smith      no processor disassembled thus we can skip this stuff
9336e713f22SBarry Smith   */
9346e713f22SBarry Smith   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
935ca161407SBarry Smith     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
93657b952d6SSatish Balay     if (mat->was_assembled && !other_disassembled) {
93757b952d6SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
93857b952d6SSatish Balay     }
9396e713f22SBarry Smith   }
94057b952d6SSatish Balay 
9416d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
94257b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
94357b952d6SSatish Balay   }
94457b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
94557b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
94657b952d6SSatish Balay 
947187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
948187ce0cbSSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
949187ce0cbSSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
950187ce0cbSSatish Balay              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
951187ce0cbSSatish Balay     baij->ht_total_ct  = 0;
952187ce0cbSSatish Balay     baij->ht_insert_ct = 0;
953187ce0cbSSatish Balay   }
954187ce0cbSSatish Balay #endif
955133cdb44SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
956133cdb44SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
957f830108cSBarry Smith     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
958f830108cSBarry Smith     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
959bd7f49f5SSatish Balay   }
960187ce0cbSSatish Balay 
96157b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
9623a40ed3dSBarry Smith   PetscFunctionReturn(0);
96357b952d6SSatish Balay }
96457b952d6SSatish Balay 
9655615d1e5SSatish Balay #undef __FUNC__
9665615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
96757b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
96857b952d6SSatish Balay {
96957b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
97057b952d6SSatish Balay   int          ierr;
97157b952d6SSatish Balay 
972d64ed03dSBarry Smith   PetscFunctionBegin;
97357b952d6SSatish Balay   if (baij->size == 1) {
97457b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
975a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
9763a40ed3dSBarry Smith   PetscFunctionReturn(0);
97757b952d6SSatish Balay }
97857b952d6SSatish Balay 
9795615d1e5SSatish Balay #undef __FUNC__
9805615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworMatlab"
98157b952d6SSatish Balay static int MatView_MPIBAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer)
98257b952d6SSatish Balay {
98357b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
98477ed5343SBarry Smith   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
98557b952d6SSatish Balay   FILE         *fd;
98657b952d6SSatish Balay   ViewerType   vtype;
98757b952d6SSatish Balay 
988d64ed03dSBarry Smith   PetscFunctionBegin;
98957b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
9903f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
99157b952d6SSatish Balay     ierr = ViewerGetFormat(viewer,&format);
992639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
9934e220ebcSLois Curfman McInnes       MatInfo info;
99457b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
99557b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
9964e220ebcSLois Curfman McInnes       ierr = MatGetInfo(mat,MAT_LOCAL,&info);
99757b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
99857b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
9994e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
10004e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
10014e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
10024e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
10034e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
10044e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
100557b952d6SSatish Balay       fflush(fd);
100657b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
100757b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
10083a40ed3dSBarry Smith       PetscFunctionReturn(0);
1009d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1010bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
10113a40ed3dSBarry Smith       PetscFunctionReturn(0);
101257b952d6SSatish Balay     }
101357b952d6SSatish Balay   }
101457b952d6SSatish Balay 
10153f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
101657b952d6SSatish Balay     Draw       draw;
101757b952d6SSatish Balay     PetscTruth isnull;
101877ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
10193a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
102057b952d6SSatish Balay   }
102157b952d6SSatish Balay 
102257b952d6SSatish Balay   if (size == 1) {
102357b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1024d64ed03dSBarry Smith   } else {
102557b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
102657b952d6SSatish Balay     Mat         A;
102757b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
102840011551SBarry Smith     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
102957b952d6SSatish Balay     int         mbs = baij->mbs;
103057b952d6SSatish Balay     Scalar      *a;
103157b952d6SSatish Balay 
103257b952d6SSatish Balay     if (!rank) {
103355843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1034d64ed03dSBarry Smith     } else {
103555843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
103657b952d6SSatish Balay     }
103757b952d6SSatish Balay     PLogObjectParent(mat,A);
103857b952d6SSatish Balay 
103957b952d6SSatish Balay     /* copy over the A part */
104057b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->A->data;
104157b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
104257b952d6SSatish Balay     rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
104357b952d6SSatish Balay 
104457b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
104557b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
104657b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
104757b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
104857b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
104957b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1050cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1051cee3aa6bSSatish Balay           col++; a += bs;
105257b952d6SSatish Balay         }
105357b952d6SSatish Balay       }
105457b952d6SSatish Balay     }
105557b952d6SSatish Balay     /* copy over the B part */
105657b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->B->data;
105757b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
105857b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
105957b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
106057b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
106157b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
106257b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
106357b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1064cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1065cee3aa6bSSatish Balay           col++; a += bs;
106657b952d6SSatish Balay         }
106757b952d6SSatish Balay       }
106857b952d6SSatish Balay     }
106957b952d6SSatish Balay     PetscFree(rvals);
10706d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
10716d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
107255843e3eSBarry Smith     /*
107355843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
107455843e3eSBarry Smith        synchronized across all processors that share the Draw object
107555843e3eSBarry Smith     */
10763f1db9ecSBarry Smith     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
107757b952d6SSatish Balay       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
107857b952d6SSatish Balay     }
107957b952d6SSatish Balay     ierr = MatDestroy(A); CHKERRQ(ierr);
108057b952d6SSatish Balay   }
10813a40ed3dSBarry Smith   PetscFunctionReturn(0);
108257b952d6SSatish Balay }
108357b952d6SSatish Balay 
108457b952d6SSatish Balay 
108557b952d6SSatish Balay 
10865615d1e5SSatish Balay #undef __FUNC__
10875615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
1088e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer)
108957b952d6SSatish Balay {
109057b952d6SSatish Balay   int         ierr;
109157b952d6SSatish Balay   ViewerType  vtype;
109257b952d6SSatish Balay 
1093d64ed03dSBarry Smith   PetscFunctionBegin;
109457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
10953f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
10963f1db9ecSBarry Smith       PetscTypeCompare(vtype,MATLAB_VIEWER)) {
109757b952d6SSatish Balay     ierr = MatView_MPIBAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr);
10983f1db9ecSBarry Smith   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
10993a40ed3dSBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
11005cd90555SBarry Smith   } else {
11015cd90555SBarry Smith     SETERRQ(1,1,"Viewer type not supported by PETSc object");
110257b952d6SSatish Balay   }
11033a40ed3dSBarry Smith   PetscFunctionReturn(0);
110457b952d6SSatish Balay }
110557b952d6SSatish Balay 
11065615d1e5SSatish Balay #undef __FUNC__
11075615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
1108e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
110979bdfe76SSatish Balay {
111079bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
111179bdfe76SSatish Balay   int         ierr;
111279bdfe76SSatish Balay 
1113d64ed03dSBarry Smith   PetscFunctionBegin;
111498dd23e9SBarry Smith   if (--mat->refct > 0) PetscFunctionReturn(0);
111598dd23e9SBarry Smith 
111698dd23e9SBarry Smith   if (mat->mapping) {
111798dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
111898dd23e9SBarry Smith   }
111998dd23e9SBarry Smith   if (mat->bmapping) {
112098dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
112198dd23e9SBarry Smith   }
112261b13de0SBarry Smith   if (mat->rmap) {
112361b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
112461b13de0SBarry Smith   }
112561b13de0SBarry Smith   if (mat->cmap) {
112661b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
112761b13de0SBarry Smith   }
11283a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
1129e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
113079bdfe76SSatish Balay #endif
113179bdfe76SSatish Balay 
113283e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
113379bdfe76SSatish Balay   PetscFree(baij->rowners);
113479bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
113579bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
113679bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
113779bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
113879bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
113979bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
114079bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
114130793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
1142b9e4cc15SSatish Balay   if (baij->hd) PetscFree(baij->hd);
114379bdfe76SSatish Balay   PetscFree(baij);
114479bdfe76SSatish Balay   PLogObjectDestroy(mat);
114579bdfe76SSatish Balay   PetscHeaderDestroy(mat);
11463a40ed3dSBarry Smith   PetscFunctionReturn(0);
114779bdfe76SSatish Balay }
114879bdfe76SSatish Balay 
11495615d1e5SSatish Balay #undef __FUNC__
11505615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
1151ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1152cee3aa6bSSatish Balay {
1153cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
115447b4a8eaSLois Curfman McInnes   int         ierr, nt;
1155cee3aa6bSSatish Balay 
1156d64ed03dSBarry Smith   PetscFunctionBegin;
1157e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
115847b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1159a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
116047b4a8eaSLois Curfman McInnes   }
1161e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
116247b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1163a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
116447b4a8eaSLois Curfman McInnes   }
116543a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1166f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
116743a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1168f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
116943a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
11703a40ed3dSBarry Smith   PetscFunctionReturn(0);
1171cee3aa6bSSatish Balay }
1172cee3aa6bSSatish Balay 
11735615d1e5SSatish Balay #undef __FUNC__
11745615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
1175ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1176cee3aa6bSSatish Balay {
1177cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1178cee3aa6bSSatish Balay   int        ierr;
1179d64ed03dSBarry Smith 
1180d64ed03dSBarry Smith   PetscFunctionBegin;
118143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1182f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
118343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1184f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
11853a40ed3dSBarry Smith   PetscFunctionReturn(0);
1186cee3aa6bSSatish Balay }
1187cee3aa6bSSatish Balay 
11885615d1e5SSatish Balay #undef __FUNC__
11895615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
1190ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1191cee3aa6bSSatish Balay {
1192cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1193cee3aa6bSSatish Balay   int         ierr;
1194cee3aa6bSSatish Balay 
1195d64ed03dSBarry Smith   PetscFunctionBegin;
1196cee3aa6bSSatish Balay   /* do nondiagonal part */
1197f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1198cee3aa6bSSatish Balay   /* send it on its way */
1199537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1200cee3aa6bSSatish Balay   /* do local part */
1201f830108cSBarry Smith   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1202cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1203cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1204cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1205639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
12063a40ed3dSBarry Smith   PetscFunctionReturn(0);
1207cee3aa6bSSatish Balay }
1208cee3aa6bSSatish Balay 
12095615d1e5SSatish Balay #undef __FUNC__
12105615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1211ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1212cee3aa6bSSatish Balay {
1213cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1214cee3aa6bSSatish Balay   int         ierr;
1215cee3aa6bSSatish Balay 
1216d64ed03dSBarry Smith   PetscFunctionBegin;
1217cee3aa6bSSatish Balay   /* do nondiagonal part */
1218f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1219cee3aa6bSSatish Balay   /* send it on its way */
1220537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1221cee3aa6bSSatish Balay   /* do local part */
1222f830108cSBarry Smith   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1223cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1224cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1225cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1226537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
12273a40ed3dSBarry Smith   PetscFunctionReturn(0);
1228cee3aa6bSSatish Balay }
1229cee3aa6bSSatish Balay 
1230cee3aa6bSSatish Balay /*
1231cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1232cee3aa6bSSatish Balay    diagonal block
1233cee3aa6bSSatish Balay */
12345615d1e5SSatish Balay #undef __FUNC__
12355615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1236ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1237cee3aa6bSSatish Balay {
1238cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
12393a40ed3dSBarry Smith   int         ierr;
1240d64ed03dSBarry Smith 
1241d64ed03dSBarry Smith   PetscFunctionBegin;
1242a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
12433a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
12443a40ed3dSBarry Smith   PetscFunctionReturn(0);
1245cee3aa6bSSatish Balay }
1246cee3aa6bSSatish Balay 
12475615d1e5SSatish Balay #undef __FUNC__
12485615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
1249ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1250cee3aa6bSSatish Balay {
1251cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1252cee3aa6bSSatish Balay   int         ierr;
1253d64ed03dSBarry Smith 
1254d64ed03dSBarry Smith   PetscFunctionBegin;
1255cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1256cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
12573a40ed3dSBarry Smith   PetscFunctionReturn(0);
1258cee3aa6bSSatish Balay }
1259026e39d0SSatish Balay 
12605615d1e5SSatish Balay #undef __FUNC__
12615615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
1262ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
126357b952d6SSatish Balay {
126457b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1265d64ed03dSBarry Smith 
1266d64ed03dSBarry Smith   PetscFunctionBegin;
1267bd7f49f5SSatish Balay   if (m) *m = mat->M;
1268bd7f49f5SSatish Balay   if (n) *n = mat->N;
12693a40ed3dSBarry Smith   PetscFunctionReturn(0);
127057b952d6SSatish Balay }
127157b952d6SSatish Balay 
12725615d1e5SSatish Balay #undef __FUNC__
12735615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1274ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
127557b952d6SSatish Balay {
127657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1277d64ed03dSBarry Smith 
1278d64ed03dSBarry Smith   PetscFunctionBegin;
1279f830108cSBarry Smith   *m = mat->m; *n = mat->n;
12803a40ed3dSBarry Smith   PetscFunctionReturn(0);
128157b952d6SSatish Balay }
128257b952d6SSatish Balay 
12835615d1e5SSatish Balay #undef __FUNC__
12845615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1285ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
128657b952d6SSatish Balay {
128757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1288d64ed03dSBarry Smith 
1289d64ed03dSBarry Smith   PetscFunctionBegin;
129057b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
12913a40ed3dSBarry Smith   PetscFunctionReturn(0);
129257b952d6SSatish Balay }
129357b952d6SSatish Balay 
1294acdf5bf4SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1295acdf5bf4SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
1296acdf5bf4SSatish Balay 
12975615d1e5SSatish Balay #undef __FUNC__
12985615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1299acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1300acdf5bf4SSatish Balay {
1301acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1302acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1303acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1304d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1305d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1306acdf5bf4SSatish Balay 
1307d64ed03dSBarry Smith   PetscFunctionBegin;
1308a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1309acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1310acdf5bf4SSatish Balay 
1311acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1312acdf5bf4SSatish Balay     /*
1313acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1314acdf5bf4SSatish Balay     */
1315acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1316bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1317bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1318acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1319acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1320acdf5bf4SSatish Balay     }
1321acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1322acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1323acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1324acdf5bf4SSatish Balay   }
1325acdf5bf4SSatish Balay 
1326a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1327d9d09a02SSatish Balay   lrow = row - brstart;
1328acdf5bf4SSatish Balay 
1329acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1330acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1331acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1332f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1333f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1334acdf5bf4SSatish Balay   nztot = nzA + nzB;
1335acdf5bf4SSatish Balay 
1336acdf5bf4SSatish Balay   cmap  = mat->garray;
1337acdf5bf4SSatish Balay   if (v  || idx) {
1338acdf5bf4SSatish Balay     if (nztot) {
1339acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1340acdf5bf4SSatish Balay       int imark = -1;
1341acdf5bf4SSatish Balay       if (v) {
1342acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1343acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1344d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1345acdf5bf4SSatish Balay           else break;
1346acdf5bf4SSatish Balay         }
1347acdf5bf4SSatish Balay         imark = i;
1348acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1349acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1350acdf5bf4SSatish Balay       }
1351acdf5bf4SSatish Balay       if (idx) {
1352acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1353acdf5bf4SSatish Balay         if (imark > -1) {
1354acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1355bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1356acdf5bf4SSatish Balay           }
1357acdf5bf4SSatish Balay         } else {
1358acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1359d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1360d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1361acdf5bf4SSatish Balay             else break;
1362acdf5bf4SSatish Balay           }
1363acdf5bf4SSatish Balay           imark = i;
1364acdf5bf4SSatish Balay         }
1365d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1366d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1367acdf5bf4SSatish Balay       }
1368d64ed03dSBarry Smith     } else {
1369d212a18eSSatish Balay       if (idx) *idx = 0;
1370d212a18eSSatish Balay       if (v)   *v   = 0;
1371d212a18eSSatish Balay     }
1372acdf5bf4SSatish Balay   }
1373acdf5bf4SSatish Balay   *nz = nztot;
1374f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1375f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
13763a40ed3dSBarry Smith   PetscFunctionReturn(0);
1377acdf5bf4SSatish Balay }
1378acdf5bf4SSatish Balay 
13795615d1e5SSatish Balay #undef __FUNC__
13805615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1381acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1382acdf5bf4SSatish Balay {
1383acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1384d64ed03dSBarry Smith 
1385d64ed03dSBarry Smith   PetscFunctionBegin;
1386acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1387a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1388acdf5bf4SSatish Balay   }
1389acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
13903a40ed3dSBarry Smith   PetscFunctionReturn(0);
1391acdf5bf4SSatish Balay }
1392acdf5bf4SSatish Balay 
13935615d1e5SSatish Balay #undef __FUNC__
13945615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1395ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
13965a838052SSatish Balay {
13975a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1398d64ed03dSBarry Smith 
1399d64ed03dSBarry Smith   PetscFunctionBegin;
14005a838052SSatish Balay   *bs = baij->bs;
14013a40ed3dSBarry Smith   PetscFunctionReturn(0);
14025a838052SSatish Balay }
14035a838052SSatish Balay 
14045615d1e5SSatish Balay #undef __FUNC__
14055615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1406ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
140758667388SSatish Balay {
140858667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
140958667388SSatish Balay   int         ierr;
1410d64ed03dSBarry Smith 
1411d64ed03dSBarry Smith   PetscFunctionBegin;
141258667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
141358667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
14143a40ed3dSBarry Smith   PetscFunctionReturn(0);
141558667388SSatish Balay }
14160ac07820SSatish Balay 
14175615d1e5SSatish Balay #undef __FUNC__
14185615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1419ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
14200ac07820SSatish Balay {
14214e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
14224e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
14237d57db60SLois Curfman McInnes   int         ierr;
14247d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
14250ac07820SSatish Balay 
1426d64ed03dSBarry Smith   PetscFunctionBegin;
14274e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
14284e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
14290e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1430de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
14314e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
14320e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1433de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
14340ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14354e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14364e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14374e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14384e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14394e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14400ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1441f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
14424e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14434e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14444e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14454e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14464e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14470ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1448f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
14494e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14504e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14514e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14524e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14534e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14540ac07820SSatish Balay   }
14555a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
14565a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
14575a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
14585a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
14594e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14604e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14614e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14623a40ed3dSBarry Smith   PetscFunctionReturn(0);
14630ac07820SSatish Balay }
14640ac07820SSatish Balay 
14655615d1e5SSatish Balay #undef __FUNC__
14665615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1467ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
146858667388SSatish Balay {
146958667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
147058667388SSatish Balay 
1471d64ed03dSBarry Smith   PetscFunctionBegin;
147258667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
147358667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
14746da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1475c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
147696854ed6SLois Curfman McInnes       op == MAT_NEW_NONZERO_ALLOCATION_ERROR ||
1477c2653b3dSLois Curfman McInnes       op == MAT_NEW_NONZERO_LOCATION_ERROR) {
1478b1fbbac0SLois Curfman McInnes         MatSetOption(a->A,op);
1479b1fbbac0SLois Curfman McInnes         MatSetOption(a->B,op);
1480b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1481aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
148258667388SSatish Balay         MatSetOption(a->A,op);
148358667388SSatish Balay         MatSetOption(a->B,op);
1484b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
14856da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
148658667388SSatish Balay              op == MAT_SYMMETRIC ||
148758667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1488b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
1489b51ba29fSSatish Balay              op == MAT_USE_HASH_TABLE)
149058667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
149158667388SSatish Balay   else if (op == MAT_COLUMN_ORIENTED) {
149258667388SSatish Balay     a->roworiented = 0;
149358667388SSatish Balay     MatSetOption(a->A,op);
149458667388SSatish Balay     MatSetOption(a->B,op);
14952b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
149690f02eecSBarry Smith     a->donotstash = 1;
1497d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1498d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1499133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1500133cdb44SSatish Balay     a->ht_flag = 1;
1501d64ed03dSBarry Smith   } else {
1502d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1503d64ed03dSBarry Smith   }
15043a40ed3dSBarry Smith   PetscFunctionReturn(0);
150558667388SSatish Balay }
150658667388SSatish Balay 
15075615d1e5SSatish Balay #undef __FUNC__
15085615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1509ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
15100ac07820SSatish Balay {
15110ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
15120ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
15130ac07820SSatish Balay   Mat        B;
151440011551SBarry Smith   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
15150ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
15160ac07820SSatish Balay   Scalar     *a;
15170ac07820SSatish Balay 
1518d64ed03dSBarry Smith   PetscFunctionBegin;
1519a8c6a408SBarry Smith   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
15200ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
15210ac07820SSatish Balay   CHKERRQ(ierr);
15220ac07820SSatish Balay 
15230ac07820SSatish Balay   /* copy over the A part */
15240ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
15250ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15260ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
15270ac07820SSatish Balay 
15280ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
15290ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15300ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
15310ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
15320ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
15330ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
15340ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15350ac07820SSatish Balay         col++; a += bs;
15360ac07820SSatish Balay       }
15370ac07820SSatish Balay     }
15380ac07820SSatish Balay   }
15390ac07820SSatish Balay   /* copy over the B part */
15400ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
15410ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
15420ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
15430ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
15440ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
15450ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
15460ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
15470ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
15480ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
15490ac07820SSatish Balay         col++; a += bs;
15500ac07820SSatish Balay       }
15510ac07820SSatish Balay     }
15520ac07820SSatish Balay   }
15530ac07820SSatish Balay   PetscFree(rvals);
15540ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15550ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15560ac07820SSatish Balay 
15570ac07820SSatish Balay   if (matout != PETSC_NULL) {
15580ac07820SSatish Balay     *matout = B;
15590ac07820SSatish Balay   } else {
1560f830108cSBarry Smith     PetscOps *Abops;
1561cc2dc46cSBarry Smith     MatOps   Aops;
1562f830108cSBarry Smith 
15630ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
15640ac07820SSatish Balay     PetscFree(baij->rowners);
15650ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
15660ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
15670ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
15680ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
15690ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
15700ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
15710ac07820SSatish Balay     PetscFree(baij);
1572f830108cSBarry Smith 
1573f830108cSBarry Smith     /*
1574f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1575f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1576f830108cSBarry Smith       else from C.
1577f830108cSBarry Smith     */
1578f830108cSBarry Smith     Abops = A->bops;
1579f830108cSBarry Smith     Aops  = A->ops;
1580f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1581f830108cSBarry Smith     A->bops = Abops;
1582f830108cSBarry Smith     A->ops  = Aops;
1583f830108cSBarry Smith 
15840ac07820SSatish Balay     PetscHeaderDestroy(B);
15850ac07820SSatish Balay   }
15863a40ed3dSBarry Smith   PetscFunctionReturn(0);
15870ac07820SSatish Balay }
15880e95ebc0SSatish Balay 
15895615d1e5SSatish Balay #undef __FUNC__
15905615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
15910e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
15920e95ebc0SSatish Balay {
15930e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
15940e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
15950e95ebc0SSatish Balay   int ierr,s1,s2,s3;
15960e95ebc0SSatish Balay 
1597d64ed03dSBarry Smith   PetscFunctionBegin;
15980e95ebc0SSatish Balay   if (ll)  {
15990e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
16000e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1601a8c6a408SBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
16020e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
16030e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
16040e95ebc0SSatish Balay   }
1605a8c6a408SBarry Smith   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
16063a40ed3dSBarry Smith   PetscFunctionReturn(0);
16070e95ebc0SSatish Balay }
16080e95ebc0SSatish Balay 
16095615d1e5SSatish Balay #undef __FUNC__
16105615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1611ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
16120ac07820SSatish Balay {
16130ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
16140ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1615a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
16160ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
16170ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1618a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
16190ac07820SSatish Balay   MPI_Comm       comm = A->comm;
16200ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
16210ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
16220ac07820SSatish Balay   IS             istmp;
162372dacd9aSBarry Smith   PetscTruth     localdiag;
16240ac07820SSatish Balay 
1625d64ed03dSBarry Smith   PetscFunctionBegin;
16260ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
16270ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
16280ac07820SSatish Balay 
16290ac07820SSatish Balay   /*  first count number of contributors to each processor */
16300ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
16310ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
16320ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
16330ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
16340ac07820SSatish Balay     idx = rows[i];
16350ac07820SSatish Balay     found = 0;
16360ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
16370ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
16380ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
16390ac07820SSatish Balay       }
16400ac07820SSatish Balay     }
1641a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
16420ac07820SSatish Balay   }
16430ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
16440ac07820SSatish Balay 
16450ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
16460ac07820SSatish Balay   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1647ca161407SBarry Smith   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
16480ac07820SSatish Balay   nrecvs = work[rank];
1649ca161407SBarry Smith   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
16500ac07820SSatish Balay   nmax   = work[rank];
16510ac07820SSatish Balay   PetscFree(work);
16520ac07820SSatish Balay 
16530ac07820SSatish Balay   /* post receives:   */
1654d64ed03dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1655d64ed03dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
16560ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
1657ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
16580ac07820SSatish Balay   }
16590ac07820SSatish Balay 
16600ac07820SSatish Balay   /* do sends:
16610ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
16620ac07820SSatish Balay      the ith processor
16630ac07820SSatish Balay   */
16640ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1665ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
16660ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
16670ac07820SSatish Balay   starts[0] = 0;
16680ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
16690ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
16700ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
16710ac07820SSatish Balay   }
16720ac07820SSatish Balay   ISRestoreIndices(is,&rows);
16730ac07820SSatish Balay 
16740ac07820SSatish Balay   starts[0] = 0;
16750ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
16760ac07820SSatish Balay   count = 0;
16770ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
16780ac07820SSatish Balay     if (procs[i]) {
1679ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
16800ac07820SSatish Balay     }
16810ac07820SSatish Balay   }
16820ac07820SSatish Balay   PetscFree(starts);
16830ac07820SSatish Balay 
16840ac07820SSatish Balay   base = owners[rank]*bs;
16850ac07820SSatish Balay 
16860ac07820SSatish Balay   /*  wait on receives */
16870ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
16880ac07820SSatish Balay   source = lens + nrecvs;
16890ac07820SSatish Balay   count  = nrecvs; slen = 0;
16900ac07820SSatish Balay   while (count) {
1691ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
16920ac07820SSatish Balay     /* unpack receives into our local space */
1693ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
16940ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
16950ac07820SSatish Balay     lens[imdex]  = n;
16960ac07820SSatish Balay     slen += n;
16970ac07820SSatish Balay     count--;
16980ac07820SSatish Balay   }
16990ac07820SSatish Balay   PetscFree(recv_waits);
17000ac07820SSatish Balay 
17010ac07820SSatish Balay   /* move the data into the send scatter */
17020ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
17030ac07820SSatish Balay   count = 0;
17040ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
17050ac07820SSatish Balay     values = rvalues + i*nmax;
17060ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
17070ac07820SSatish Balay       lrows[count++] = values[j] - base;
17080ac07820SSatish Balay     }
17090ac07820SSatish Balay   }
17100ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
17110ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
17120ac07820SSatish Balay 
17130ac07820SSatish Balay   /* actually zap the local rows */
1714029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
17150ac07820SSatish Balay   PLogObjectParent(A,istmp);
1716a07cd24cSSatish Balay 
171772dacd9aSBarry Smith   /*
171872dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
171972dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
172072dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
172172dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
172272dacd9aSBarry Smith 
172372dacd9aSBarry Smith        Contributed by: Mathew Knepley
172472dacd9aSBarry Smith   */
172572dacd9aSBarry Smith   localdiag = PETSC_FALSE;
172672dacd9aSBarry Smith   if (diag && (l->A->M == l->A->N)) {
172772dacd9aSBarry Smith     localdiag = PETSC_TRUE;
172872dacd9aSBarry Smith     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
172972dacd9aSBarry Smith   } else {
1730a07cd24cSSatish Balay     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
173172dacd9aSBarry Smith   }
17320ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
17330ac07820SSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
17340ac07820SSatish Balay 
173572dacd9aSBarry Smith   if (diag && (localdiag == PETSC_FALSE)) {
1736a07cd24cSSatish Balay     for ( i = 0; i < slen; i++ ) {
1737a07cd24cSSatish Balay       row = lrows[i] + rstart_bs;
1738a07cd24cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); CHKERRQ(ierr);
1739a07cd24cSSatish Balay     }
1740a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1741a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
1742a07cd24cSSatish Balay   }
1743a07cd24cSSatish Balay   PetscFree(lrows);
1744a07cd24cSSatish Balay 
17450ac07820SSatish Balay   /* wait on sends */
17460ac07820SSatish Balay   if (nsends) {
1747d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1748ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
17490ac07820SSatish Balay     PetscFree(send_status);
17500ac07820SSatish Balay   }
17510ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
17520ac07820SSatish Balay 
17533a40ed3dSBarry Smith   PetscFunctionReturn(0);
17540ac07820SSatish Balay }
175572dacd9aSBarry Smith 
1756ba4ca20aSSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
17575615d1e5SSatish Balay #undef __FUNC__
17585615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1759ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1760ba4ca20aSSatish Balay {
1761ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
176225fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1763133cdb44SSatish Balay   static int  called = 0;
17643a40ed3dSBarry Smith   int         ierr;
1765ba4ca20aSSatish Balay 
1766d64ed03dSBarry Smith   PetscFunctionBegin;
17673a40ed3dSBarry Smith   if (!a->rank) {
17683a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
176925fdafccSSatish Balay   }
177025fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1771133cdb44SSatish Balay   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1772133cdb44SSatish Balay   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
17733a40ed3dSBarry Smith   PetscFunctionReturn(0);
1774ba4ca20aSSatish Balay }
17750ac07820SSatish Balay 
17765615d1e5SSatish Balay #undef __FUNC__
17775615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1778ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1779bb5a7306SBarry Smith {
1780bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1781bb5a7306SBarry Smith   int         ierr;
1782d64ed03dSBarry Smith 
1783d64ed03dSBarry Smith   PetscFunctionBegin;
1784bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
17853a40ed3dSBarry Smith   PetscFunctionReturn(0);
1786bb5a7306SBarry Smith }
1787bb5a7306SBarry Smith 
17882e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
17890ac07820SSatish Balay 
179079bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1791cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1792cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1793cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1794cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1795cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1796cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
1797cc2dc46cSBarry Smith   MatMultTrans_MPIBAIJ,
1798cc2dc46cSBarry Smith   MatMultTransAdd_MPIBAIJ,
1799cc2dc46cSBarry Smith   0,
1800cc2dc46cSBarry Smith   0,
1801cc2dc46cSBarry Smith   0,
1802cc2dc46cSBarry Smith   0,
1803cc2dc46cSBarry Smith   0,
1804cc2dc46cSBarry Smith   0,
1805cc2dc46cSBarry Smith   0,
1806cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1807cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
1808cc2dc46cSBarry Smith   0,
1809cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1810cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1811cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1812cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1813cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1814cc2dc46cSBarry Smith   0,
1815cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1816cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1817cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1818cc2dc46cSBarry Smith   0,
1819cc2dc46cSBarry Smith   0,
1820cc2dc46cSBarry Smith   0,
1821cc2dc46cSBarry Smith   0,
1822cc2dc46cSBarry Smith   MatGetSize_MPIBAIJ,
1823cc2dc46cSBarry Smith   MatGetLocalSize_MPIBAIJ,
1824cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1825cc2dc46cSBarry Smith   0,
1826cc2dc46cSBarry Smith   0,
1827cc2dc46cSBarry Smith   0,
1828cc2dc46cSBarry Smith   0,
18292e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1830cc2dc46cSBarry Smith   0,
1831cc2dc46cSBarry Smith   0,
1832cc2dc46cSBarry Smith   0,
1833cc2dc46cSBarry Smith   0,
1834cc2dc46cSBarry Smith   0,
1835cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1836cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1837cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1838cc2dc46cSBarry Smith   0,
1839cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1840cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1841cc2dc46cSBarry Smith   0,
1842cc2dc46cSBarry Smith   0,
1843cc2dc46cSBarry Smith   0,
1844cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1845cc2dc46cSBarry Smith   0,
1846cc2dc46cSBarry Smith   0,
1847cc2dc46cSBarry Smith   0,
1848cc2dc46cSBarry Smith   0,
1849cc2dc46cSBarry Smith   0,
1850cc2dc46cSBarry Smith   0,
1851cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
1852cc2dc46cSBarry Smith   0,
1853cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
1854cc2dc46cSBarry Smith   0,
1855cc2dc46cSBarry Smith   0,
1856cc2dc46cSBarry Smith   0,
1857cc2dc46cSBarry Smith   MatGetMaps_Petsc};
185879bdfe76SSatish Balay 
1859*e18c124aSSatish Balay #include "pc.h"
1860*e18c124aSSatish Balay EXTERN_C_BEGIN
1861*e18c124aSSatish Balay extern int PCSetUp_BJacobi_BAIJ(PC);
1862*e18c124aSSatish Balay EXTERN_C_END
186379bdfe76SSatish Balay 
18645615d1e5SSatish Balay #undef __FUNC__
18655615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
186679bdfe76SSatish Balay /*@C
186779bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
186879bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
186979bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
187079bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
187179bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
187279bdfe76SSatish Balay 
1873db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1874db81eaa0SLois Curfman McInnes 
187579bdfe76SSatish Balay    Input Parameters:
1876db81eaa0SLois Curfman McInnes +  comm - MPI communicator
187779bdfe76SSatish Balay .  bs   - size of blockk
187879bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
187992e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
188092e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
188192e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
188292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
188392e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
1884be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1885be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
188679bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
188779bdfe76SSatish Balay            submatrix  (same for all local rows)
188892e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
188992e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
1890db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
189192e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
189279bdfe76SSatish Balay            submatrix (same for all local rows).
1893db81eaa0SLois Curfman McInnes -  o_nzz - array containing the number of nonzeros in the various block rows of the
189492e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
189592e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
189679bdfe76SSatish Balay 
189779bdfe76SSatish Balay    Output Parameter:
189879bdfe76SSatish Balay .  A - the matrix
189979bdfe76SSatish Balay 
1900db81eaa0SLois Curfman McInnes    Options Database Keys:
1901db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
1902db81eaa0SLois Curfman McInnes                      block calculations (much slower)
1903db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
1904494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
1905494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
19063ffaccefSLois Curfman McInnes 
1907b259b22eSLois Curfman McInnes    Notes:
190879bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
190979bdfe76SSatish Balay    (possibly both).
191079bdfe76SSatish Balay 
1911be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1912be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
1913be79a94dSBarry Smith 
191479bdfe76SSatish Balay    Storage Information:
191579bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
191679bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
191779bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
191879bdfe76SSatish Balay    local matrix (a rectangular submatrix).
191979bdfe76SSatish Balay 
192079bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
192179bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
192279bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
192379bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
192479bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
192579bdfe76SSatish Balay 
192679bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
192779bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
192879bdfe76SSatish Balay 
1929db81eaa0SLois Curfman McInnes .vb
1930db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
1931db81eaa0SLois Curfman McInnes           -------------------
1932db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
1933db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
1934db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
1935db81eaa0SLois Curfman McInnes           -------------------
1936db81eaa0SLois Curfman McInnes .ve
193779bdfe76SSatish Balay 
193879bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
193979bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
194079bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
194157b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
194279bdfe76SSatish Balay 
1943d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1944d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
194579bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
194692e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
194792e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
19486da5968aSLois Curfman McInnes    matrices.
194979bdfe76SSatish Balay 
195092e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
195179bdfe76SSatish Balay 
1952db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
195379bdfe76SSatish Balay @*/
195479bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
195579bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
195679bdfe76SSatish Balay {
195779bdfe76SSatish Balay   Mat          B;
195879bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
1959133cdb44SSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1960a2ab621fSBarry Smith   int          flag1 = 0,flag2 = 0;
196179bdfe76SSatish Balay 
1962d64ed03dSBarry Smith   PetscFunctionBegin;
1963a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
19643914022bSBarry Smith 
19653914022bSBarry Smith   MPI_Comm_size(comm,&size);
1966494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
1967494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1968494eafd4SBarry Smith   if (!flag1 && !flag2 && size == 1) {
19693914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
19703914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
19713914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
19723a40ed3dSBarry Smith     PetscFunctionReturn(0);
19733914022bSBarry Smith   }
19743914022bSBarry Smith 
197579bdfe76SSatish Balay   *A = 0;
19763f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
197779bdfe76SSatish Balay   PLogObjectCreate(B);
197879bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
197979bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1980cc2dc46cSBarry Smith   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
19814c50302cSBarry Smith 
1982e1311b90SBarry Smith   B->ops->destroy    = MatDestroy_MPIBAIJ;
1983e1311b90SBarry Smith   B->ops->view       = MatView_MPIBAIJ;
198490f02eecSBarry Smith   B->mapping    = 0;
198579bdfe76SSatish Balay   B->factor     = 0;
198679bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
198779bdfe76SSatish Balay 
1988e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
198979bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
199079bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
199179bdfe76SSatish Balay 
1992d64ed03dSBarry Smith   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1993a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1994d64ed03dSBarry Smith   }
1995a8c6a408SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1996a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1997a8c6a408SBarry Smith   }
1998a8c6a408SBarry Smith   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1999a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2000a8c6a408SBarry Smith   }
2001cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2002cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
200379bdfe76SSatish Balay 
200479bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
200579bdfe76SSatish Balay     work[0] = m; work[1] = n;
200679bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
2007ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
200879bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
200979bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
201079bdfe76SSatish Balay   }
201179bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
201279bdfe76SSatish Balay     Mbs = M/bs;
2013a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
201479bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
201579bdfe76SSatish Balay     m   = mbs*bs;
201679bdfe76SSatish Balay   }
201779bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
201879bdfe76SSatish Balay     Nbs = N/bs;
2019a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
202079bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
202179bdfe76SSatish Balay     n   = nbs*bs;
202279bdfe76SSatish Balay   }
2023a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
2024a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2025a8c6a408SBarry Smith   }
202679bdfe76SSatish Balay 
202779bdfe76SSatish Balay   b->m = m; B->m = m;
202879bdfe76SSatish Balay   b->n = n; B->n = n;
202979bdfe76SSatish Balay   b->N = N; B->N = N;
203079bdfe76SSatish Balay   b->M = M; B->M = M;
203179bdfe76SSatish Balay   b->bs  = bs;
203279bdfe76SSatish Balay   b->bs2 = bs*bs;
203379bdfe76SSatish Balay   b->mbs = mbs;
203479bdfe76SSatish Balay   b->nbs = nbs;
203579bdfe76SSatish Balay   b->Mbs = Mbs;
203679bdfe76SSatish Balay   b->Nbs = Nbs;
203779bdfe76SSatish Balay 
2038c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
2039c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
2040488ecbafSBarry Smith   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2041488ecbafSBarry Smith   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2042c7fcc2eaSBarry Smith 
204379bdfe76SSatish Balay   /* build local table of row and column ownerships */
204479bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
2045f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
20460ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
2047ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
204879bdfe76SSatish Balay   b->rowners[0] = 0;
204979bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
205079bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
205179bdfe76SSatish Balay   }
205279bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
205379bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
20544fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
20554fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
20564fa0d573SSatish Balay 
2057ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
205879bdfe76SSatish Balay   b->cowners[0] = 0;
205979bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
206079bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
206179bdfe76SSatish Balay   }
206279bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
206379bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
20644fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
20654fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
206679bdfe76SSatish Balay 
2067a07cd24cSSatish Balay 
206879bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2069029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
207079bdfe76SSatish Balay   PLogObjectParent(B,b->A);
207179bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2072029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
207379bdfe76SSatish Balay   PLogObjectParent(B,b->B);
207479bdfe76SSatish Balay 
207579bdfe76SSatish Balay   /* build cache for off array entries formed */
207679bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
207790f02eecSBarry Smith   b->donotstash  = 0;
207879bdfe76SSatish Balay   b->colmap      = 0;
207979bdfe76SSatish Balay   b->garray      = 0;
208079bdfe76SSatish Balay   b->roworiented = 1;
208179bdfe76SSatish Balay 
208230793edcSSatish Balay   /* stuff used in block assembly */
208330793edcSSatish Balay   b->barray       = 0;
208430793edcSSatish Balay 
208579bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
208679bdfe76SSatish Balay   b->lvec         = 0;
208779bdfe76SSatish Balay   b->Mvctx        = 0;
208879bdfe76SSatish Balay 
208979bdfe76SSatish Balay   /* stuff for MatGetRow() */
209079bdfe76SSatish Balay   b->rowindices   = 0;
209179bdfe76SSatish Balay   b->rowvalues    = 0;
209279bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
209379bdfe76SSatish Balay 
2094a07cd24cSSatish Balay   /* hash table stuff */
2095a07cd24cSSatish Balay   b->ht           = 0;
2096187ce0cbSSatish Balay   b->hd           = 0;
20970bdbc534SSatish Balay   b->ht_size      = 0;
2098133cdb44SSatish Balay   b->ht_flag      = 0;
209925fdafccSSatish Balay   b->ht_fact      = 0;
2100187ce0cbSSatish Balay   b->ht_total_ct  = 0;
2101187ce0cbSSatish Balay   b->ht_insert_ct = 0;
2102a07cd24cSSatish Balay 
210379bdfe76SSatish Balay   *A = B;
2104133cdb44SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2105133cdb44SSatish Balay   if (flg) {
2106133cdb44SSatish Balay     double fact = 1.39;
2107133cdb44SSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2108133cdb44SSatish Balay     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2109133cdb44SSatish Balay     if (fact <= 1.0) fact = 1.39;
2110133cdb44SSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2111133cdb44SSatish Balay     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2112133cdb44SSatish Balay   }
2113*e18c124aSSatish Balay   ierr = PetscObjectComposeFunction((PetscObject)B,"PCSetUp_BJacobi_C",
2114*e18c124aSSatish Balay                                      "PCSetUp_BJacobi_BAIJ",
2115*e18c124aSSatish Balay                                      (void*)PCSetUp_BJacobi_BAIJ);CHKERRQ(ierr);
21163a40ed3dSBarry Smith   PetscFunctionReturn(0);
211779bdfe76SSatish Balay }
2118026e39d0SSatish Balay 
21195615d1e5SSatish Balay #undef __FUNC__
21202e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ"
21212e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
21220ac07820SSatish Balay {
21230ac07820SSatish Balay   Mat         mat;
21240ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
21250ac07820SSatish Balay   int         ierr, len=0, flg;
21260ac07820SSatish Balay 
2127d64ed03dSBarry Smith   PetscFunctionBegin;
21280ac07820SSatish Balay   *newmat       = 0;
21293f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
21300ac07820SSatish Balay   PLogObjectCreate(mat);
21310ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2132cc2dc46cSBarry Smith   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2133e1311b90SBarry Smith   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2134e1311b90SBarry Smith   mat->ops->view       = MatView_MPIBAIJ;
21350ac07820SSatish Balay   mat->factor          = matin->factor;
21360ac07820SSatish Balay   mat->assembled       = PETSC_TRUE;
21370ac07820SSatish Balay 
21380ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
21390ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
21400ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
21410ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
21420ac07820SSatish Balay 
21430ac07820SSatish Balay   a->bs  = oldmat->bs;
21440ac07820SSatish Balay   a->bs2 = oldmat->bs2;
21450ac07820SSatish Balay   a->mbs = oldmat->mbs;
21460ac07820SSatish Balay   a->nbs = oldmat->nbs;
21470ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
21480ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
21490ac07820SSatish Balay 
21500ac07820SSatish Balay   a->rstart       = oldmat->rstart;
21510ac07820SSatish Balay   a->rend         = oldmat->rend;
21520ac07820SSatish Balay   a->cstart       = oldmat->cstart;
21530ac07820SSatish Balay   a->cend         = oldmat->cend;
21540ac07820SSatish Balay   a->size         = oldmat->size;
21550ac07820SSatish Balay   a->rank         = oldmat->rank;
2156e0fa3b82SLois Curfman McInnes   mat->insertmode = NOT_SET_VALUES;
21570ac07820SSatish Balay   a->rowvalues    = 0;
21580ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
215930793edcSSatish Balay   a->barray       = 0;
21600ac07820SSatish Balay 
2161133cdb44SSatish Balay   /* hash table stuff */
2162133cdb44SSatish Balay   a->ht           = 0;
2163133cdb44SSatish Balay   a->hd           = 0;
2164133cdb44SSatish Balay   a->ht_size      = 0;
2165133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
216625fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2167133cdb44SSatish Balay   a->ht_total_ct  = 0;
2168133cdb44SSatish Balay   a->ht_insert_ct = 0;
2169133cdb44SSatish Balay 
2170133cdb44SSatish Balay 
21710ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2172f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
21730ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
21740ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
21750ac07820SSatish Balay   ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr);
21760ac07820SSatish Balay   if (oldmat->colmap) {
21770ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
21780ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
21790ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
21800ac07820SSatish Balay   } else a->colmap = 0;
21810ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
21820ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
21830ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
21840ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
21850ac07820SSatish Balay   } else a->garray = 0;
21860ac07820SSatish Balay 
21870ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
21880ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
21890ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2190*e18c124aSSatish Balay 
21910ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
21922e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
21930ac07820SSatish Balay   PLogObjectParent(mat,a->A);
21942e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
21950ac07820SSatish Balay   PLogObjectParent(mat,a->B);
21960ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2197*e18c124aSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
21980ac07820SSatish Balay   if (flg) {
21990ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
22000ac07820SSatish Balay   }
22010ac07820SSatish Balay   *newmat = mat;
22023a40ed3dSBarry Smith   PetscFunctionReturn(0);
22030ac07820SSatish Balay }
220457b952d6SSatish Balay 
220557b952d6SSatish Balay #include "sys.h"
220657b952d6SSatish Balay 
22075615d1e5SSatish Balay #undef __FUNC__
22085615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
220957b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
221057b952d6SSatish Balay {
221157b952d6SSatish Balay   Mat          A;
221257b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
221357b952d6SSatish Balay   Scalar       *vals,*buf;
221457b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
221557b952d6SSatish Balay   MPI_Status   status;
2216cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
221757b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
221840011551SBarry Smith   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
221957b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
222057b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
222157b952d6SSatish Balay 
2222d64ed03dSBarry Smith   PetscFunctionBegin;
222357b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
222457b952d6SSatish Balay 
222557b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
222657b952d6SSatish Balay   if (!rank) {
222757b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2228e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2229a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2230d64ed03dSBarry Smith     if (header[3] < 0) {
2231a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2232d64ed03dSBarry Smith     }
22336c5fab8fSBarry Smith   }
2234d64ed03dSBarry Smith 
2235ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
223657b952d6SSatish Balay   M = header[1]; N = header[2];
223757b952d6SSatish Balay 
2238a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
223957b952d6SSatish Balay 
224057b952d6SSatish Balay   /*
224157b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
224257b952d6SSatish Balay      divisible by the blocksize
224357b952d6SSatish Balay   */
224457b952d6SSatish Balay   Mbs        = M/bs;
224557b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
224657b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
224757b952d6SSatish Balay   else                  Mbs++;
224857b952d6SSatish Balay   if (extra_rows &&!rank) {
2249b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
225057b952d6SSatish Balay   }
2251537820f0SBarry Smith 
225257b952d6SSatish Balay   /* determine ownership of all rows */
225357b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
225457b952d6SSatish Balay   m   = mbs * bs;
2255cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2256cee3aa6bSSatish Balay   browners = rowners + size + 1;
2257ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
225857b952d6SSatish Balay   rowners[0] = 0;
2259cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2260cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
226157b952d6SSatish Balay   rstart = rowners[rank];
226257b952d6SSatish Balay   rend   = rowners[rank+1];
226357b952d6SSatish Balay 
226457b952d6SSatish Balay   /* distribute row lengths to all processors */
226557b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
226657b952d6SSatish Balay   if (!rank) {
226757b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2268e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
226957b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
227057b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2271cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2272ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
227357b952d6SSatish Balay     PetscFree(sndcounts);
2274d64ed03dSBarry Smith   } else {
2275ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
227657b952d6SSatish Balay   }
227757b952d6SSatish Balay 
227857b952d6SSatish Balay   if (!rank) {
227957b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
228057b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
228157b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
228257b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
228357b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
228457b952d6SSatish Balay         procsnz[i] += rowlengths[j];
228557b952d6SSatish Balay       }
228657b952d6SSatish Balay     }
228757b952d6SSatish Balay     PetscFree(rowlengths);
228857b952d6SSatish Balay 
228957b952d6SSatish Balay     /* determine max buffer needed and allocate it */
229057b952d6SSatish Balay     maxnz = 0;
229157b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
229257b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
229357b952d6SSatish Balay     }
229457b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
229557b952d6SSatish Balay 
229657b952d6SSatish Balay     /* read in my part of the matrix column indices  */
229757b952d6SSatish Balay     nz = procsnz[0];
229857b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
229957b952d6SSatish Balay     mycols = ibuf;
2300cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2301e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2302cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2303cee3aa6bSSatish Balay 
230457b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
230557b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
230657b952d6SSatish Balay       nz   = procsnz[i];
2307e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2308ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
230957b952d6SSatish Balay     }
231057b952d6SSatish Balay     /* read in the stuff for the last proc */
231157b952d6SSatish Balay     if ( size != 1 ) {
231257b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2313e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
231457b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2315ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
231657b952d6SSatish Balay     }
231757b952d6SSatish Balay     PetscFree(cols);
2318d64ed03dSBarry Smith   } else {
231957b952d6SSatish Balay     /* determine buffer space needed for message */
232057b952d6SSatish Balay     nz = 0;
232157b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
232257b952d6SSatish Balay       nz += locrowlens[i];
232357b952d6SSatish Balay     }
232457b952d6SSatish Balay     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
232557b952d6SSatish Balay     mycols = ibuf;
232657b952d6SSatish Balay     /* receive message of column indices*/
2327ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2328ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2329a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
233057b952d6SSatish Balay   }
233157b952d6SSatish Balay 
233257b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2333cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2334cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
233557b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2336cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
233757b952d6SSatish Balay   masked1 = mask    + Mbs;
233857b952d6SSatish Balay   masked2 = masked1 + Mbs;
233957b952d6SSatish Balay   rowcount = 0; nzcount = 0;
234057b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
234157b952d6SSatish Balay     dcount  = 0;
234257b952d6SSatish Balay     odcount = 0;
234357b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
234457b952d6SSatish Balay       kmax = locrowlens[rowcount];
234557b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
234657b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
234757b952d6SSatish Balay         if (!mask[tmp]) {
234857b952d6SSatish Balay           mask[tmp] = 1;
234957b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
235057b952d6SSatish Balay           else masked1[dcount++] = tmp;
235157b952d6SSatish Balay         }
235257b952d6SSatish Balay       }
235357b952d6SSatish Balay       rowcount++;
235457b952d6SSatish Balay     }
2355cee3aa6bSSatish Balay 
235657b952d6SSatish Balay     dlens[i]  = dcount;
235757b952d6SSatish Balay     odlens[i] = odcount;
2358cee3aa6bSSatish Balay 
235957b952d6SSatish Balay     /* zero out the mask elements we set */
236057b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
236157b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
236257b952d6SSatish Balay   }
2363cee3aa6bSSatish Balay 
236457b952d6SSatish Balay   /* create our matrix */
2365537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2366537820f0SBarry Smith          CHKERRQ(ierr);
236757b952d6SSatish Balay   A = *newmat;
23686d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
236957b952d6SSatish Balay 
237057b952d6SSatish Balay   if (!rank) {
237157b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
237257b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
237357b952d6SSatish Balay     nz = procsnz[0];
237457b952d6SSatish Balay     vals = buf;
2375cee3aa6bSSatish Balay     mycols = ibuf;
2376cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2377e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2378cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2379537820f0SBarry Smith 
238057b952d6SSatish Balay     /* insert into matrix */
238157b952d6SSatish Balay     jj      = rstart*bs;
238257b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
238357b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
238457b952d6SSatish Balay       mycols += locrowlens[i];
238557b952d6SSatish Balay       vals   += locrowlens[i];
238657b952d6SSatish Balay       jj++;
238757b952d6SSatish Balay     }
238857b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
238957b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
239057b952d6SSatish Balay       nz   = procsnz[i];
239157b952d6SSatish Balay       vals = buf;
2392e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2393ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
239457b952d6SSatish Balay     }
239557b952d6SSatish Balay     /* the last proc */
239657b952d6SSatish Balay     if ( size != 1 ){
239757b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2398cee3aa6bSSatish Balay       vals = buf;
2399e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
240057b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2401ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
240257b952d6SSatish Balay     }
240357b952d6SSatish Balay     PetscFree(procsnz);
2404d64ed03dSBarry Smith   } else {
240557b952d6SSatish Balay     /* receive numeric values */
240657b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
240757b952d6SSatish Balay 
240857b952d6SSatish Balay     /* receive message of values*/
240957b952d6SSatish Balay     vals   = buf;
2410cee3aa6bSSatish Balay     mycols = ibuf;
2411ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2412ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2413a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
241457b952d6SSatish Balay 
241557b952d6SSatish Balay     /* insert into matrix */
241657b952d6SSatish Balay     jj      = rstart*bs;
2417cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
241857b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
241957b952d6SSatish Balay       mycols += locrowlens[i];
242057b952d6SSatish Balay       vals   += locrowlens[i];
242157b952d6SSatish Balay       jj++;
242257b952d6SSatish Balay     }
242357b952d6SSatish Balay   }
242457b952d6SSatish Balay   PetscFree(locrowlens);
242557b952d6SSatish Balay   PetscFree(buf);
242657b952d6SSatish Balay   PetscFree(ibuf);
242757b952d6SSatish Balay   PetscFree(rowners);
242857b952d6SSatish Balay   PetscFree(dlens);
2429cee3aa6bSSatish Balay   PetscFree(mask);
24306d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
24316d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
24323a40ed3dSBarry Smith   PetscFunctionReturn(0);
243357b952d6SSatish Balay }
243457b952d6SSatish Balay 
243557b952d6SSatish Balay 
2436133cdb44SSatish Balay 
2437133cdb44SSatish Balay #undef __FUNC__
2438133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2439133cdb44SSatish Balay /*@
2440133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2441133cdb44SSatish Balay 
2442133cdb44SSatish Balay    Input Parameters:
2443133cdb44SSatish Balay .  mat  - the matrix
2444133cdb44SSatish Balay .  fact - factor
2445133cdb44SSatish Balay 
2446fee21e36SBarry Smith    Collective on Mat
2447fee21e36SBarry Smith 
2448133cdb44SSatish Balay   Notes:
2449133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2450133cdb44SSatish Balay 
2451133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2452133cdb44SSatish Balay 
2453133cdb44SSatish Balay .seealso: MatSetOption()
2454133cdb44SSatish Balay @*/
2455133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2456133cdb44SSatish Balay {
245725fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2458133cdb44SSatish Balay 
2459133cdb44SSatish Balay   PetscFunctionBegin;
2460133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
246125fdafccSSatish Balay   if (mat->type != MATMPIBAIJ) {
2462133cdb44SSatish Balay       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2463133cdb44SSatish Balay   }
2464133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*) mat->data;
2465133cdb44SSatish Balay   baij->ht_fact = fact;
2466133cdb44SSatish Balay   PetscFunctionReturn(0);
2467133cdb44SSatish Balay }
2468