xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ff2fd23678e82342cbd04bb0e286b55a1de873a7)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*ff2fd236SBarry Smith static char vcid[] = "$Id: mpibaij.c,v 1.160 1999/03/12 22:03:12 bsmith Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
577ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
957b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
10d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
117b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
183b2fbd54SBarry Smith 
19537820f0SBarry Smith /*
20537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2157b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2257b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2357b952d6SSatish Balay    length of colmap equals the global matrix length.
2457b952d6SSatish Balay */
255615d1e5SSatish Balay #undef __FUNC__
265615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
2757b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2857b952d6SSatish Balay {
2957b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3057b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
316dee3f7eSBarry Smith   int         nbs = B->nbs,i,bs=B->bs;
326dee3f7eSBarry Smith #if defined (USE_CTABLE)
336dee3f7eSBarry Smith   int         ierr;
346dee3f7eSBarry Smith #endif
3557b952d6SSatish Balay 
36d64ed03dSBarry Smith   PetscFunctionBegin;
3748e59246SSatish Balay #if defined (USE_CTABLE)
38fa46199cSSatish Balay   ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr);
3948e59246SSatish Balay   for ( i=0; i<nbs; i++ ){
4048e59246SSatish Balay     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
4148e59246SSatish Balay   }
4248e59246SSatish Balay #else
43758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
4457b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4557b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
46928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4748e59246SSatish Balay #endif
483a40ed3dSBarry Smith   PetscFunctionReturn(0);
4957b952d6SSatish Balay }
5057b952d6SSatish Balay 
5180c1aa95SSatish Balay #define CHUNKSIZE  10
5280c1aa95SSatish Balay 
53f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
5480c1aa95SSatish Balay { \
5580c1aa95SSatish Balay  \
5680c1aa95SSatish Balay     brow = row/bs;  \
5780c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
58ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
5980c1aa95SSatish Balay       bcol = col/bs; \
6080c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
61ab26458aSBarry Smith       low = 0; high = nrow; \
62ab26458aSBarry Smith       while (high-low > 3) { \
63ab26458aSBarry Smith         t = (low+high)/2; \
64ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
65ab26458aSBarry Smith         else              low  = t; \
66ab26458aSBarry Smith       } \
67ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
6880c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
6980c1aa95SSatish Balay         if (rp[_i] == bcol) { \
7080c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
71eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
72eada6651SSatish Balay           else                    *bap  = value;  \
73ac7a638eSSatish Balay           goto a_noinsert; \
7480c1aa95SSatish Balay         } \
7580c1aa95SSatish Balay       } \
7689280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
77a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
7880c1aa95SSatish Balay       if (nrow >= rmax) { \
7980c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
8080c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
8180c1aa95SSatish Balay         Scalar *new_a; \
8280c1aa95SSatish Balay  \
83a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
8489280ab3SLois Curfman McInnes  \
8580c1aa95SSatish Balay         /* malloc new storage space */ \
8680c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
8780c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
8880c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
8980c1aa95SSatish Balay         new_i   = new_j + new_nz; \
9080c1aa95SSatish Balay  \
9180c1aa95SSatish Balay         /* copy over old data into new slots */ \
9280c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
9380c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
9480c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
9580c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
9680c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
9780c1aa95SSatish Balay                                                            len*sizeof(int)); \
9880c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
9980c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
10080c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
10180c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
10280c1aa95SSatish Balay         /* free up old matrix storage */ \
10380c1aa95SSatish Balay         PetscFree(a->a);  \
10480c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
10580c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
10680c1aa95SSatish Balay         a->singlemalloc = 1; \
10780c1aa95SSatish Balay  \
10880c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
11080c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
11180c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
11280c1aa95SSatish Balay         a->reallocs++; \
11380c1aa95SSatish Balay         a->nz++; \
11480c1aa95SSatish Balay       } \
11580c1aa95SSatish Balay       N = nrow++ - 1;  \
11680c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
11780c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
11880c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
11980c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
12080c1aa95SSatish Balay       } \
12180c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
12280c1aa95SSatish Balay       rp[_i]                      = bcol;  \
12380c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
124ac7a638eSSatish Balay       a_noinsert:; \
12580c1aa95SSatish Balay     ailen[brow] = nrow; \
12680c1aa95SSatish Balay }
12757b952d6SSatish Balay 
128ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
129ac7a638eSSatish Balay { \
130ac7a638eSSatish Balay  \
131ac7a638eSSatish Balay     brow = row/bs;  \
132ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
133ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
134ac7a638eSSatish Balay       bcol = col/bs; \
135ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
136ac7a638eSSatish Balay       low = 0; high = nrow; \
137ac7a638eSSatish Balay       while (high-low > 3) { \
138ac7a638eSSatish Balay         t = (low+high)/2; \
139ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
140ac7a638eSSatish Balay         else              low  = t; \
141ac7a638eSSatish Balay       } \
142ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
143ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
144ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
145ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
146ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
147ac7a638eSSatish Balay           else                    *bap  = value;  \
148ac7a638eSSatish Balay           goto b_noinsert; \
149ac7a638eSSatish Balay         } \
150ac7a638eSSatish Balay       } \
15189280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
152a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
153ac7a638eSSatish Balay       if (nrow >= rmax) { \
154ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
15574c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
156ac7a638eSSatish Balay         Scalar *new_a; \
157ac7a638eSSatish Balay  \
158a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
15989280ab3SLois Curfman McInnes  \
160ac7a638eSSatish Balay         /* malloc new storage space */ \
16174c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
162ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
163ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
164ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
165ac7a638eSSatish Balay  \
166ac7a638eSSatish Balay         /* copy over old data into new slots */ \
167ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
16874c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
169ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
170ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
171ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
172ac7a638eSSatish Balay                                                            len*sizeof(int)); \
173ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
174ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
175ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
176ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
177ac7a638eSSatish Balay         /* free up old matrix storage */ \
17874c639caSSatish Balay         PetscFree(b->a);  \
17974c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
18074c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
18174c639caSSatish Balay         b->singlemalloc = 1; \
182ac7a638eSSatish Balay  \
183ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
184ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
18574c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
18674c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
18774c639caSSatish Balay         b->reallocs++; \
18874c639caSSatish Balay         b->nz++; \
189ac7a638eSSatish Balay       } \
190ac7a638eSSatish Balay       N = nrow++ - 1;  \
191ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
192ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
193ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
194ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
195ac7a638eSSatish Balay       } \
196ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
197ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
198ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
199ac7a638eSSatish Balay       b_noinsert:; \
200ac7a638eSSatish Balay     bilen[brow] = nrow; \
201ac7a638eSSatish Balay }
202ac7a638eSSatish Balay 
2035615d1e5SSatish Balay #undef __FUNC__
2045615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
205ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
20657b952d6SSatish Balay {
20757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
20857b952d6SSatish Balay   Scalar      value;
2094fa0d573SSatish Balay   int         ierr,i,j,row,col;
2104fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2114fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2124fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
21357b952d6SSatish Balay 
214eada6651SSatish Balay   /* Some Variables required in the macro */
21580c1aa95SSatish Balay   Mat         A = baij->A;
21680c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
217ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
218ac7a638eSSatish Balay   Scalar      *aa=a->a;
219ac7a638eSSatish Balay 
220ac7a638eSSatish Balay   Mat         B = baij->B;
221ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
222ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
223ac7a638eSSatish Balay   Scalar      *ba=b->a;
224ac7a638eSSatish Balay 
225ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
226ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
227ac7a638eSSatish Balay   Scalar      *ap,*bap;
22880c1aa95SSatish Balay 
229d64ed03dSBarry Smith   PetscFunctionBegin;
23057b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
2315ef9f2a5SBarry Smith     if (im[i] < 0) continue;
2323a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
233a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
234639f9d9dSBarry Smith #endif
23557b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
23657b952d6SSatish Balay       row = im[i] - rstart_orig;
23757b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
23857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
23957b952d6SSatish Balay           col = in[j] - cstart_orig;
24057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
241f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
24280c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
24373959e64SBarry Smith         } else if (in[j] < 0) continue;
2443a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
245a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
246639f9d9dSBarry Smith #endif
24757b952d6SSatish Balay         else {
24857b952d6SSatish Balay           if (mat->was_assembled) {
249905e6a2fSBarry Smith             if (!baij->colmap) {
250905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
251905e6a2fSBarry Smith             }
25248e59246SSatish Balay #if defined (USE_CTABLE)
253fa46199cSSatish Balay             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr);
254fa46199cSSatish Balay             col  = col - 1 + in[j]%bs;
25548e59246SSatish Balay #else
256905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
25748e59246SSatish Balay #endif
25857b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
25957b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
26057b952d6SSatish Balay               col =  in[j];
2619bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2629bf004c3SSatish Balay               B = baij->B;
2639bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2649bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2659bf004c3SSatish Balay               ba=b->a;
26657b952d6SSatish Balay             }
267d64ed03dSBarry Smith           } else col = in[j];
26857b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
269ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
270ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
27157b952d6SSatish Balay         }
27257b952d6SSatish Balay       }
273d64ed03dSBarry Smith     } else {
27490f02eecSBarry Smith       if ( !baij->donotstash) {
275*ff2fd236SBarry Smith         if (roworiented) {
276*ff2fd236SBarry Smith           ierr = StashValuesRoworiented_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
277*ff2fd236SBarry Smith         } else {
278*ff2fd236SBarry Smith           ierr = StashValuesColumnoriented_Private(&baij->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
27957b952d6SSatish Balay         }
28057b952d6SSatish Balay       }
28157b952d6SSatish Balay     }
28290f02eecSBarry Smith   }
2833a40ed3dSBarry Smith   PetscFunctionReturn(0);
28457b952d6SSatish Balay }
28557b952d6SSatish Balay 
286ab26458aSBarry Smith #undef __FUNC__
287ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
288ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
289ab26458aSBarry Smith {
290ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
29130793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
2927ef1d9bdSSatish Balay   int         ierr,i,j,ii,jj,row,col;
293ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
294ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
295ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
296ab26458aSBarry Smith 
29730793edcSSatish Balay   if(!barray) {
29847513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
29930793edcSSatish Balay   }
30030793edcSSatish Balay 
301ab26458aSBarry Smith   if (roworiented) {
302ab26458aSBarry Smith     stepval = (n-1)*bs;
303ab26458aSBarry Smith   } else {
304ab26458aSBarry Smith     stepval = (m-1)*bs;
305ab26458aSBarry Smith   }
306ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
3075ef9f2a5SBarry Smith     if (im[i] < 0) continue;
3083a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
3095ef9f2a5SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
310ab26458aSBarry Smith #endif
311ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
312ab26458aSBarry Smith       row = im[i] - rstart;
313ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
31415b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
31515b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
31615b57d14SSatish Balay           barray = v + i*bs2;
31715b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
31815b57d14SSatish Balay           barray = v + j*bs2;
31915b57d14SSatish Balay         } else { /* Here a copy is required */
320ab26458aSBarry Smith           if (roworiented) {
321ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
322ab26458aSBarry Smith           } else {
323ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
324abef11f7SSatish Balay           }
32547513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
32647513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
32730793edcSSatish Balay               *barray++  = *value++;
32847513183SBarry Smith             }
32947513183SBarry Smith           }
33030793edcSSatish Balay           barray -=bs2;
33115b57d14SSatish Balay         }
332abef11f7SSatish Balay 
333abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
334abef11f7SSatish Balay           col  = in[j] - cstart;
33530793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
336ab26458aSBarry Smith         }
3375ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
3383a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
3395ef9f2a5SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
340ab26458aSBarry Smith #endif
341ab26458aSBarry Smith         else {
342ab26458aSBarry Smith           if (mat->was_assembled) {
343ab26458aSBarry Smith             if (!baij->colmap) {
344ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
345ab26458aSBarry Smith             }
346a5eb4965SSatish Balay 
3473a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
34848e59246SSatish Balay #if defined (USE_CTABLE)
349fa46199cSSatish Balay             { int data;
350fa46199cSSatish Balay             ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr);
351fa46199cSSatish Balay             if((data - 1) % bs)
35248e59246SSatish Balay 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
353fa46199cSSatish Balay             }
35448e59246SSatish Balay #else
355a8c6a408SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
356a5eb4965SSatish Balay #endif
35748e59246SSatish Balay #endif
35848e59246SSatish Balay #if defined (USE_CTABLE)
359fa46199cSSatish Balay 	    ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr);
360fa46199cSSatish Balay             col  = (col - 1)/bs;
36148e59246SSatish Balay #else
362a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
36348e59246SSatish Balay #endif
364ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
365ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
366ab26458aSBarry Smith               col =  in[j];
367ab26458aSBarry Smith             }
368ab26458aSBarry Smith           }
369ab26458aSBarry Smith           else col = in[j];
37030793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
371ab26458aSBarry Smith         }
372ab26458aSBarry Smith       }
373d64ed03dSBarry Smith     } else {
374ab26458aSBarry Smith       if (!baij->donotstash) {
375*ff2fd236SBarry Smith         if (roworiented) {
376*ff2fd236SBarry Smith           ierr = StashValuesRoworientedBlocked_Private(
377*ff2fd236SBarry Smith                     &baij->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
378*ff2fd236SBarry Smith         } else {
379*ff2fd236SBarry Smith           ierr = StashValuesColumnorientedBlocked_Private(
380*ff2fd236SBarry Smith                     &baij->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
381*ff2fd236SBarry Smith         }
382abef11f7SSatish Balay       }
383ab26458aSBarry Smith     }
384ab26458aSBarry Smith   }
3853a40ed3dSBarry Smith   PetscFunctionReturn(0);
386ab26458aSBarry Smith }
3870bdbc534SSatish Balay #define HASH_KEY 0.6180339887
388c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
389c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
390c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
3915615d1e5SSatish Balay #undef __FUNC__
3920bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
3930bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
3940bdbc534SSatish Balay {
3950bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3960bdbc534SSatish Balay   int         ierr,i,j,row,col;
3970bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
398c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
399c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
400c2760754SSatish Balay   double      tmp;
401b9e4cc15SSatish Balay   Scalar      ** HD = baij->hd,value;
4024a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
4034a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4044a15367fSSatish Balay #endif
4050bdbc534SSatish Balay 
4060bdbc534SSatish Balay   PetscFunctionBegin;
4070bdbc534SSatish Balay 
4080bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
4090bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
4100bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4110bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4120bdbc534SSatish Balay #endif
4130bdbc534SSatish Balay       row = im[i];
414c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4150bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4160bdbc534SSatish Balay         col = in[j];
4170bdbc534SSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4180bdbc534SSatish Balay         /* Look up into the Hash Table */
419c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
420c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4210bdbc534SSatish Balay 
422c2760754SSatish Balay 
423c2760754SSatish Balay         idx = h1;
424187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
425187ce0cbSSatish Balay         insert_ct++;
426187ce0cbSSatish Balay         total_ct++;
427187ce0cbSSatish Balay         if (HT[idx] != key) {
428187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
429187ce0cbSSatish Balay           if (idx == size) {
430187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
431187ce0cbSSatish Balay             if (idx == h1) {
432187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
433187ce0cbSSatish Balay             }
434187ce0cbSSatish Balay           }
435187ce0cbSSatish Balay         }
436187ce0cbSSatish Balay #else
437c2760754SSatish Balay         if (HT[idx] != key) {
438c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
439c2760754SSatish Balay           if (idx == size) {
440c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
441c2760754SSatish Balay             if (idx == h1) {
442c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
443c2760754SSatish Balay             }
444c2760754SSatish Balay           }
445c2760754SSatish Balay         }
446187ce0cbSSatish Balay #endif
447c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
448c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
449c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
4500bdbc534SSatish Balay       }
4510bdbc534SSatish Balay     } else {
4520bdbc534SSatish Balay       if (!baij->donotstash) {
453*ff2fd236SBarry Smith         if (roworiented) {
454*ff2fd236SBarry Smith           ierr = StashValuesRoworiented_Private(&baij->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
455*ff2fd236SBarry Smith         } else {
456*ff2fd236SBarry Smith           ierr = StashValuesColumnoriented_Private(&baij->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4570bdbc534SSatish Balay         }
4580bdbc534SSatish Balay       }
4590bdbc534SSatish Balay     }
4600bdbc534SSatish Balay   }
461187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
462187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
463187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
464187ce0cbSSatish Balay #endif
4650bdbc534SSatish Balay   PetscFunctionReturn(0);
4660bdbc534SSatish Balay }
4670bdbc534SSatish Balay 
4680bdbc534SSatish Balay #undef __FUNC__
4690bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
4700bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4710bdbc534SSatish Balay {
4720bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4730bdbc534SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
4740bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
475b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
476c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
477c2760754SSatish Balay   double      tmp;
478187ce0cbSSatish Balay   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
4794a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
4804a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4814a15367fSSatish Balay #endif
4820bdbc534SSatish Balay 
483d0a41580SSatish Balay   PetscFunctionBegin;
484d0a41580SSatish Balay 
4850bdbc534SSatish Balay   if (roworiented) {
4860bdbc534SSatish Balay     stepval = (n-1)*bs;
4870bdbc534SSatish Balay   } else {
4880bdbc534SSatish Balay     stepval = (m-1)*bs;
4890bdbc534SSatish Balay   }
4900bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
4910bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
4920bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4930bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4940bdbc534SSatish Balay #endif
4950bdbc534SSatish Balay     row   = im[i];
496187ce0cbSSatish Balay     v_t   = v + i*bs2;
497c2760754SSatish Balay     if (row >= rstart && row < rend) {
4980bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4990bdbc534SSatish Balay         col = in[j];
5000bdbc534SSatish Balay 
5010bdbc534SSatish Balay         /* Look up into the Hash Table */
502c2760754SSatish Balay         key = row*Nbs+col+1;
503c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5040bdbc534SSatish Balay 
505c2760754SSatish Balay         idx = h1;
506187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
507187ce0cbSSatish Balay         total_ct++;
508187ce0cbSSatish Balay         insert_ct++;
509187ce0cbSSatish Balay        if (HT[idx] != key) {
510187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
511187ce0cbSSatish Balay           if (idx == size) {
512187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
513187ce0cbSSatish Balay             if (idx == h1) {
514187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
515187ce0cbSSatish Balay             }
516187ce0cbSSatish Balay           }
517187ce0cbSSatish Balay         }
518187ce0cbSSatish Balay #else
519c2760754SSatish Balay         if (HT[idx] != key) {
520c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
521c2760754SSatish Balay           if (idx == size) {
522c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
523c2760754SSatish Balay             if (idx == h1) {
524c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
525c2760754SSatish Balay             }
526c2760754SSatish Balay           }
527c2760754SSatish Balay         }
528187ce0cbSSatish Balay #endif
529c2760754SSatish Balay         baij_a = HD[idx];
5300bdbc534SSatish Balay         if (roworiented) {
531c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
532187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
533187ce0cbSSatish Balay           value = v_t;
534187ce0cbSSatish Balay           v_t  += bs;
535fef45726SSatish Balay           if (addv == ADD_VALUES) {
536c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
537c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
538fef45726SSatish Balay                 baij_a[jj]  += *value++;
539b4cc0f5aSSatish Balay               }
540b4cc0f5aSSatish Balay             }
541fef45726SSatish Balay           } else {
542c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
543c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
544fef45726SSatish Balay                 baij_a[jj]  = *value++;
545fef45726SSatish Balay               }
546fef45726SSatish Balay             }
547fef45726SSatish Balay           }
5480bdbc534SSatish Balay         } else {
5490bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
550fef45726SSatish Balay           if (addv == ADD_VALUES) {
551b4cc0f5aSSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
5520bdbc534SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
553fef45726SSatish Balay                 baij_a[jj]  += *value++;
554fef45726SSatish Balay               }
555fef45726SSatish Balay             }
556fef45726SSatish Balay           } else {
557fef45726SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
558fef45726SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
559fef45726SSatish Balay                 baij_a[jj]  = *value++;
560fef45726SSatish Balay               }
561b4cc0f5aSSatish Balay             }
5620bdbc534SSatish Balay           }
5630bdbc534SSatish Balay         }
5640bdbc534SSatish Balay       }
5650bdbc534SSatish Balay     } else {
5660bdbc534SSatish Balay       if (!baij->donotstash) {
5670bdbc534SSatish Balay         if (roworiented ) {
5680bdbc534SSatish Balay           row   = im[i]*bs;
5690bdbc534SSatish Balay           value = v + i*(stepval+bs)*bs;
5700bdbc534SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
5710bdbc534SSatish Balay             for ( k=0; k<n; k++ ) {
5720bdbc534SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
573*ff2fd236SBarry Smith                 ierr = StashValuesRoworiented_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr);
5740bdbc534SSatish Balay               }
5750bdbc534SSatish Balay             }
5760bdbc534SSatish Balay           }
5770bdbc534SSatish Balay         } else {
5780bdbc534SSatish Balay           for ( j=0; j<n; j++ ) {
5790bdbc534SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
5800bdbc534SSatish Balay             col   = in[j]*bs;
5810bdbc534SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
5820bdbc534SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
583*ff2fd236SBarry Smith                 ierr = StashValuesRoworiented_Private(&baij->stash,row,1,&col,value++);CHKERRQ(ierr);
5840bdbc534SSatish Balay               }
5850bdbc534SSatish Balay             }
5860bdbc534SSatish Balay           }
5870bdbc534SSatish Balay         }
5880bdbc534SSatish Balay       }
5890bdbc534SSatish Balay     }
5900bdbc534SSatish Balay   }
591187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
592187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
593187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
594187ce0cbSSatish Balay #endif
5950bdbc534SSatish Balay   PetscFunctionReturn(0);
5960bdbc534SSatish Balay }
597133cdb44SSatish Balay 
5980bdbc534SSatish Balay #undef __FUNC__
5995615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
600ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
601d6de1c52SSatish Balay {
602d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
603d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
60448e59246SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
605d6de1c52SSatish Balay 
606133cdb44SSatish Balay   PetscFunctionBegin;
607d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
608a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
609a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
610d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
611d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
612d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
613a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
614a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
615d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
616d6de1c52SSatish Balay           col = idxn[j] - bscstart;
61798dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
618d64ed03dSBarry Smith         } else {
619905e6a2fSBarry Smith           if (!baij->colmap) {
620905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
621905e6a2fSBarry Smith           }
62248e59246SSatish Balay #if defined (USE_CTABLE)
623fa46199cSSatish Balay           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr);
624fa46199cSSatish Balay           data --;
62548e59246SSatish Balay #else
62648e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
62748e59246SSatish Balay #endif
62848e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
629d9d09a02SSatish Balay           else {
63048e59246SSatish Balay             col  = data + idxn[j]%bs;
63198dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
632d6de1c52SSatish Balay           }
633d6de1c52SSatish Balay         }
634d6de1c52SSatish Balay       }
635d64ed03dSBarry Smith     } else {
636a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
637d6de1c52SSatish Balay     }
638d6de1c52SSatish Balay   }
6393a40ed3dSBarry Smith  PetscFunctionReturn(0);
640d6de1c52SSatish Balay }
641d6de1c52SSatish Balay 
6425615d1e5SSatish Balay #undef __FUNC__
6435615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
644ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
645d6de1c52SSatish Balay {
646d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
647d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
648acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
649d6de1c52SSatish Balay   double     sum = 0.0;
650d6de1c52SSatish Balay   Scalar     *v;
651d6de1c52SSatish Balay 
652d64ed03dSBarry Smith   PetscFunctionBegin;
653d6de1c52SSatish Balay   if (baij->size == 1) {
654d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
655d6de1c52SSatish Balay   } else {
656d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
657d6de1c52SSatish Balay       v = amat->a;
658d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
6593a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
660e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
661d6de1c52SSatish Balay #else
662d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
663d6de1c52SSatish Balay #endif
664d6de1c52SSatish Balay       }
665d6de1c52SSatish Balay       v = bmat->a;
666d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
6673a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
668e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
669d6de1c52SSatish Balay #else
670d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
671d6de1c52SSatish Balay #endif
672d6de1c52SSatish Balay       }
673ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
674d6de1c52SSatish Balay       *norm = sqrt(*norm);
675d64ed03dSBarry Smith     } else {
676e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
677d6de1c52SSatish Balay     }
678d64ed03dSBarry Smith   }
6793a40ed3dSBarry Smith   PetscFunctionReturn(0);
680d6de1c52SSatish Balay }
68157b952d6SSatish Balay 
682bd7f49f5SSatish Balay 
683fef45726SSatish Balay /*
684fef45726SSatish Balay   Creates the hash table, and sets the table
685fef45726SSatish Balay   This table is created only once.
686fef45726SSatish Balay   If new entried need to be added to the matrix
687fef45726SSatish Balay   then the hash table has to be destroyed and
688fef45726SSatish Balay   recreated.
689fef45726SSatish Balay */
690fef45726SSatish Balay #undef __FUNC__
691fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
692d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
693596b8d2eSBarry Smith {
694596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
695596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
696596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
6970bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
6984a15367fSSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart;
699187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
700fef45726SSatish Balay   int         *HT,key;
7010bdbc534SSatish Balay   Scalar      **HD;
702c2760754SSatish Balay   double      tmp;
7034a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
7044a15367fSSatish Balay   int         ct=0,max=0;
7054a15367fSSatish Balay #endif
706fef45726SSatish Balay 
707d64ed03dSBarry Smith   PetscFunctionBegin;
7080bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
7090bdbc534SSatish Balay   size = baij->ht_size;
710fef45726SSatish Balay 
7110bdbc534SSatish Balay   if (baij->ht) {
7120bdbc534SSatish Balay     PetscFunctionReturn(0);
713596b8d2eSBarry Smith   }
7140bdbc534SSatish Balay 
715fef45726SSatish Balay   /* Allocate Memory for Hash Table */
716b9e4cc15SSatish Balay   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
717b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
718b9e4cc15SSatish Balay   HD = baij->hd;
719a07cd24cSSatish Balay   HT = baij->ht;
720b9e4cc15SSatish Balay 
721b9e4cc15SSatish Balay 
722c2760754SSatish Balay   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
7230bdbc534SSatish Balay 
724596b8d2eSBarry Smith 
725596b8d2eSBarry Smith   /* Loop Over A */
7260bdbc534SSatish Balay   for ( i=0; i<a->mbs; i++ ) {
727596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
7280bdbc534SSatish Balay       row = i+rstart;
7290bdbc534SSatish Balay       col = aj[j]+cstart;
730596b8d2eSBarry Smith 
731187ce0cbSSatish Balay       key = row*Nbs + col + 1;
732c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7330bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7340bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7350bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7360bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
737596b8d2eSBarry Smith           break;
738187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
739187ce0cbSSatish Balay         } else {
740187ce0cbSSatish Balay           ct++;
741187ce0cbSSatish Balay #endif
742596b8d2eSBarry Smith         }
743187ce0cbSSatish Balay       }
744187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
745187ce0cbSSatish Balay       if (k> max) max = k;
746187ce0cbSSatish Balay #endif
747596b8d2eSBarry Smith     }
748596b8d2eSBarry Smith   }
749596b8d2eSBarry Smith   /* Loop Over B */
7500bdbc534SSatish Balay   for ( i=0; i<b->mbs; i++ ) {
751596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
7520bdbc534SSatish Balay       row = i+rstart;
7530bdbc534SSatish Balay       col = garray[bj[j]];
754187ce0cbSSatish Balay       key = row*Nbs + col + 1;
755c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7560bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7570bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7580bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7590bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
760596b8d2eSBarry Smith           break;
761187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
762187ce0cbSSatish Balay         } else {
763187ce0cbSSatish Balay           ct++;
764187ce0cbSSatish Balay #endif
765596b8d2eSBarry Smith         }
766187ce0cbSSatish Balay       }
767187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
768187ce0cbSSatish Balay       if (k> max) max = k;
769187ce0cbSSatish Balay #endif
770596b8d2eSBarry Smith     }
771596b8d2eSBarry Smith   }
772596b8d2eSBarry Smith 
773596b8d2eSBarry Smith   /* Print Summary */
774187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
775c2760754SSatish Balay   for ( i=0,j=0; i<size; i++)
776596b8d2eSBarry Smith     if (HT[i]) {j++;}
777187ce0cbSSatish Balay   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
778187ce0cbSSatish Balay            (j== 0)? 0.0:((double)(ct+j))/j,max);
779187ce0cbSSatish Balay #endif
7803a40ed3dSBarry Smith   PetscFunctionReturn(0);
781596b8d2eSBarry Smith }
78257b952d6SSatish Balay 
783bbb85fb3SSatish Balay #undef __FUNC__
784bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
785bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
786bbb85fb3SSatish Balay {
787bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
788*ff2fd236SBarry Smith   int         ierr,nstash,reallocs;
789bbb85fb3SSatish Balay   InsertMode  addv;
790bbb85fb3SSatish Balay 
791bbb85fb3SSatish Balay   PetscFunctionBegin;
792bbb85fb3SSatish Balay   if (baij->donotstash) {
793bbb85fb3SSatish Balay     PetscFunctionReturn(0);
794bbb85fb3SSatish Balay   }
795bbb85fb3SSatish Balay 
796bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
797bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
798bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
799bbb85fb3SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
800bbb85fb3SSatish Balay   }
801bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
802bbb85fb3SSatish Balay 
803*ff2fd236SBarry Smith   ierr =  StashScatterBegin_Private(&baij->stash,baij->rowners_bs); CHKERRQ(ierr);
804a2d1c673SSatish Balay   ierr =  StashScatterBegin_Private(&baij->bstash,baij->rowners); CHKERRQ(ierr);
805*ff2fd236SBarry Smith   ierr = StashGetInfo(&baij->stash,&nstash,&reallocs); CHKERRQ(ierr);
806*ff2fd236SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries, uses %d mallocs.\n",
807*ff2fd236SBarry Smith            nstash,reallocs);
808*ff2fd236SBarry Smith   ierr = StashGetInfo(&baij->stash,&nstash,&reallocs); CHKERRQ(ierr);
809*ff2fd236SBarry Smith   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",
810*ff2fd236SBarry Smith            nstash,reallocs);
811bbb85fb3SSatish Balay   PetscFunctionReturn(0);
812bbb85fb3SSatish Balay }
813bbb85fb3SSatish Balay 
814bbb85fb3SSatish Balay #undef __FUNC__
815bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
816bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
817bbb85fb3SSatish Balay {
818bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ *) mat->data;
819a2d1c673SSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
820a2d1c673SSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
821a2d1c673SSatish Balay   int         *row,*col,other_disassembled,r1,r2,r3;
822bbb85fb3SSatish Balay   Scalar      *val;
823bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
824bbb85fb3SSatish Balay 
825bbb85fb3SSatish Balay   PetscFunctionBegin;
826bbb85fb3SSatish Balay   if (!baij->donotstash) {
827a2d1c673SSatish Balay     while (1) {
828a2d1c673SSatish Balay       ierr = StashScatterGetMesg_Private(&baij->stash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
829a2d1c673SSatish Balay       if (!flg) break;
830a2d1c673SSatish Balay 
831bbb85fb3SSatish Balay       for ( i=0; i<n; ) {
832bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
833bbb85fb3SSatish Balay         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
834bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
835bbb85fb3SSatish Balay         else       ncols = n-i;
836bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
837bbb85fb3SSatish Balay         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
838bbb85fb3SSatish Balay         i = j;
839bbb85fb3SSatish Balay       }
840bbb85fb3SSatish Balay     }
841a2d1c673SSatish Balay     ierr = StashScatterEnd_Private(&baij->stash); CHKERRQ(ierr);
842a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
843a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
844a2d1c673SSatish Balay        restore the original flags */
845a2d1c673SSatish Balay     r1 = baij->roworiented;
846a2d1c673SSatish Balay     r2 = a->roworiented;
847a2d1c673SSatish Balay     r3 = b->roworiented;
848a2d1c673SSatish Balay     baij->roworiented = 0;
849a2d1c673SSatish Balay     a->roworiented    = 0;
850a2d1c673SSatish Balay     b->roworiented    = 0;
851a2d1c673SSatish Balay     while (1) {
852a2d1c673SSatish Balay       ierr = StashScatterGetMesg_Private(&baij->bstash,&n,&row,&col,&val,&flg); CHKERRQ(ierr);
853a2d1c673SSatish Balay       if (!flg) break;
854a2d1c673SSatish Balay 
855a2d1c673SSatish Balay       for ( i=0; i<n; ) {
856a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
857a2d1c673SSatish Balay         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
858a2d1c673SSatish Balay         if (j < n) ncols = j-i;
859a2d1c673SSatish Balay         else       ncols = n-i;
860a2d1c673SSatish Balay         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv); CHKERRQ(ierr);
861a2d1c673SSatish Balay         i = j;
862a2d1c673SSatish Balay       }
863a2d1c673SSatish Balay     }
864a2d1c673SSatish Balay     ierr = StashScatterEnd_Private(&baij->bstash); CHKERRQ(ierr);
865a2d1c673SSatish Balay     baij->roworiented = r1;
866a2d1c673SSatish Balay     a->roworiented    = r2;
867a2d1c673SSatish Balay     b->roworiented    = r3;
868bbb85fb3SSatish Balay   }
869bbb85fb3SSatish Balay 
870bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
871bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
872bbb85fb3SSatish Balay 
873bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
874bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
875bbb85fb3SSatish Balay   /*
876bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
877bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
878bbb85fb3SSatish Balay   */
879bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
880bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
881bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
882bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
883bbb85fb3SSatish Balay     }
884bbb85fb3SSatish Balay   }
885bbb85fb3SSatish Balay 
886bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
887bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
888bbb85fb3SSatish Balay   }
889bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
890bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
891bbb85fb3SSatish Balay 
892bbb85fb3SSatish Balay #if defined(USE_PETSC_BOPT_g)
893bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
894bbb85fb3SSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
895bbb85fb3SSatish Balay              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
896bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
897bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
898bbb85fb3SSatish Balay   }
899bbb85fb3SSatish Balay #endif
900bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
901bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
902bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
903bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
904bbb85fb3SSatish Balay   }
905bbb85fb3SSatish Balay 
906bbb85fb3SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
907bbb85fb3SSatish Balay   PetscFunctionReturn(0);
908bbb85fb3SSatish Balay }
90957b952d6SSatish Balay 
9105615d1e5SSatish Balay #undef __FUNC__
9115615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
91257b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
91357b952d6SSatish Balay {
91457b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
91557b952d6SSatish Balay   int          ierr;
91657b952d6SSatish Balay 
917d64ed03dSBarry Smith   PetscFunctionBegin;
91857b952d6SSatish Balay   if (baij->size == 1) {
91957b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
920a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
9213a40ed3dSBarry Smith   PetscFunctionReturn(0);
92257b952d6SSatish Balay }
92357b952d6SSatish Balay 
9245615d1e5SSatish Balay #undef __FUNC__
9257b2a1423SBarry Smith #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
9267b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
92757b952d6SSatish Balay {
92857b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
92977ed5343SBarry Smith   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
93057b952d6SSatish Balay   FILE         *fd;
93157b952d6SSatish Balay   ViewerType   vtype;
93257b952d6SSatish Balay 
933d64ed03dSBarry Smith   PetscFunctionBegin;
93457b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
9353f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
936d41123aaSBarry Smith     ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr);
937639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
9384e220ebcSLois Curfman McInnes       MatInfo info;
93957b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
94057b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
941d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
94257b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
94357b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
9444e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
9454e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
9464e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
9474e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
9484e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
9494e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
95057b952d6SSatish Balay       fflush(fd);
95157b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
95257b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
9533a40ed3dSBarry Smith       PetscFunctionReturn(0);
954d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
955bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
9563a40ed3dSBarry Smith       PetscFunctionReturn(0);
95757b952d6SSatish Balay     }
95857b952d6SSatish Balay   }
95957b952d6SSatish Balay 
9603f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
96157b952d6SSatish Balay     Draw       draw;
96257b952d6SSatish Balay     PetscTruth isnull;
96377ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
9643a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
96557b952d6SSatish Balay   }
96657b952d6SSatish Balay 
96757b952d6SSatish Balay   if (size == 1) {
96857b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
969d64ed03dSBarry Smith   } else {
97057b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
97157b952d6SSatish Balay     Mat         A;
97257b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
97340011551SBarry Smith     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
97457b952d6SSatish Balay     int         mbs = baij->mbs;
97557b952d6SSatish Balay     Scalar      *a;
97657b952d6SSatish Balay 
97757b952d6SSatish Balay     if (!rank) {
97855843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
979d64ed03dSBarry Smith     } else {
98055843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
98157b952d6SSatish Balay     }
98257b952d6SSatish Balay     PLogObjectParent(mat,A);
98357b952d6SSatish Balay 
98457b952d6SSatish Balay     /* copy over the A part */
98557b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->A->data;
98657b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
98757b952d6SSatish Balay     rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
98857b952d6SSatish Balay 
98957b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
99057b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
99157b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
99257b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
99357b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
99457b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
995cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
996cee3aa6bSSatish Balay           col++; a += bs;
99757b952d6SSatish Balay         }
99857b952d6SSatish Balay       }
99957b952d6SSatish Balay     }
100057b952d6SSatish Balay     /* copy over the B part */
100157b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->B->data;
100257b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
100357b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
100457b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
100557b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
100657b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
100757b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
100857b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1009cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1010cee3aa6bSSatish Balay           col++; a += bs;
101157b952d6SSatish Balay         }
101257b952d6SSatish Balay       }
101357b952d6SSatish Balay     }
101457b952d6SSatish Balay     PetscFree(rvals);
10156d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
10166d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
101755843e3eSBarry Smith     /*
101855843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
101955843e3eSBarry Smith        synchronized across all processors that share the Draw object
102055843e3eSBarry Smith     */
10213f1db9ecSBarry Smith     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
102257b952d6SSatish Balay       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
102357b952d6SSatish Balay     }
102457b952d6SSatish Balay     ierr = MatDestroy(A); CHKERRQ(ierr);
102557b952d6SSatish Balay   }
10263a40ed3dSBarry Smith   PetscFunctionReturn(0);
102757b952d6SSatish Balay }
102857b952d6SSatish Balay 
102957b952d6SSatish Balay 
103057b952d6SSatish Balay 
10315615d1e5SSatish Balay #undef __FUNC__
10325615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
1033e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer)
103457b952d6SSatish Balay {
103557b952d6SSatish Balay   int         ierr;
103657b952d6SSatish Balay   ViewerType  vtype;
103757b952d6SSatish Balay 
1038d64ed03dSBarry Smith   PetscFunctionBegin;
103957b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
10403f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
10417b2a1423SBarry Smith       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
10427b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
10433f1db9ecSBarry Smith   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
10443a40ed3dSBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
10455cd90555SBarry Smith   } else {
10465cd90555SBarry Smith     SETERRQ(1,1,"Viewer type not supported by PETSc object");
104757b952d6SSatish Balay   }
10483a40ed3dSBarry Smith   PetscFunctionReturn(0);
104957b952d6SSatish Balay }
105057b952d6SSatish Balay 
10515615d1e5SSatish Balay #undef __FUNC__
10525615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
1053e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
105479bdfe76SSatish Balay {
105579bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
105679bdfe76SSatish Balay   int         ierr;
105779bdfe76SSatish Balay 
1058d64ed03dSBarry Smith   PetscFunctionBegin;
105998dd23e9SBarry Smith   if (--mat->refct > 0) PetscFunctionReturn(0);
106098dd23e9SBarry Smith 
106198dd23e9SBarry Smith   if (mat->mapping) {
106298dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
106398dd23e9SBarry Smith   }
106498dd23e9SBarry Smith   if (mat->bmapping) {
106598dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
106698dd23e9SBarry Smith   }
106761b13de0SBarry Smith   if (mat->rmap) {
106861b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
106961b13de0SBarry Smith   }
107061b13de0SBarry Smith   if (mat->cmap) {
107161b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
107261b13de0SBarry Smith   }
10733a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
1074e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
107579bdfe76SSatish Balay #endif
107679bdfe76SSatish Balay 
107783e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
1078a2d1c673SSatish Balay   ierr = StashDestroy_Private(&baij->bstash); CHKERRQ(ierr);
107979bdfe76SSatish Balay   PetscFree(baij->rowners);
108079bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
108179bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
108248e59246SSatish Balay #if defined (USE_CTABLE)
108348e59246SSatish Balay   if (baij->colmap) TableDelete(baij->colmap);
108448e59246SSatish Balay #else
108579bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
108648e59246SSatish Balay #endif
108779bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
108879bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
108979bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
109079bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
109130793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
1092b9e4cc15SSatish Balay   if (baij->hd) PetscFree(baij->hd);
109379bdfe76SSatish Balay   PetscFree(baij);
109479bdfe76SSatish Balay   PLogObjectDestroy(mat);
109579bdfe76SSatish Balay   PetscHeaderDestroy(mat);
10963a40ed3dSBarry Smith   PetscFunctionReturn(0);
109779bdfe76SSatish Balay }
109879bdfe76SSatish Balay 
10995615d1e5SSatish Balay #undef __FUNC__
11005615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
1101ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1102cee3aa6bSSatish Balay {
1103cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
110447b4a8eaSLois Curfman McInnes   int         ierr, nt;
1105cee3aa6bSSatish Balay 
1106d64ed03dSBarry Smith   PetscFunctionBegin;
1107e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
110847b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1109a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
111047b4a8eaSLois Curfman McInnes   }
1111e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
111247b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1113a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
111447b4a8eaSLois Curfman McInnes   }
111543a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1116f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
111743a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1118f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
111943a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
11203a40ed3dSBarry Smith   PetscFunctionReturn(0);
1121cee3aa6bSSatish Balay }
1122cee3aa6bSSatish Balay 
11235615d1e5SSatish Balay #undef __FUNC__
11245615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
1125ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1126cee3aa6bSSatish Balay {
1127cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1128cee3aa6bSSatish Balay   int        ierr;
1129d64ed03dSBarry Smith 
1130d64ed03dSBarry Smith   PetscFunctionBegin;
113143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1132f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
113343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1134f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
11353a40ed3dSBarry Smith   PetscFunctionReturn(0);
1136cee3aa6bSSatish Balay }
1137cee3aa6bSSatish Balay 
11385615d1e5SSatish Balay #undef __FUNC__
11395615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
1140ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1141cee3aa6bSSatish Balay {
1142cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1143cee3aa6bSSatish Balay   int         ierr;
1144cee3aa6bSSatish Balay 
1145d64ed03dSBarry Smith   PetscFunctionBegin;
1146cee3aa6bSSatish Balay   /* do nondiagonal part */
1147f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1148cee3aa6bSSatish Balay   /* send it on its way */
1149537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1150cee3aa6bSSatish Balay   /* do local part */
1151f830108cSBarry Smith   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1152cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1153cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1154cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1155639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
11563a40ed3dSBarry Smith   PetscFunctionReturn(0);
1157cee3aa6bSSatish Balay }
1158cee3aa6bSSatish Balay 
11595615d1e5SSatish Balay #undef __FUNC__
11605615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1161ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1162cee3aa6bSSatish Balay {
1163cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1164cee3aa6bSSatish Balay   int         ierr;
1165cee3aa6bSSatish Balay 
1166d64ed03dSBarry Smith   PetscFunctionBegin;
1167cee3aa6bSSatish Balay   /* do nondiagonal part */
1168f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1169cee3aa6bSSatish Balay   /* send it on its way */
1170537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1171cee3aa6bSSatish Balay   /* do local part */
1172f830108cSBarry Smith   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1173cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1174cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1175cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1176537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
11773a40ed3dSBarry Smith   PetscFunctionReturn(0);
1178cee3aa6bSSatish Balay }
1179cee3aa6bSSatish Balay 
1180cee3aa6bSSatish Balay /*
1181cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1182cee3aa6bSSatish Balay    diagonal block
1183cee3aa6bSSatish Balay */
11845615d1e5SSatish Balay #undef __FUNC__
11855615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1186ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1187cee3aa6bSSatish Balay {
1188cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
11893a40ed3dSBarry Smith   int         ierr;
1190d64ed03dSBarry Smith 
1191d64ed03dSBarry Smith   PetscFunctionBegin;
1192a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
11933a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
11943a40ed3dSBarry Smith   PetscFunctionReturn(0);
1195cee3aa6bSSatish Balay }
1196cee3aa6bSSatish Balay 
11975615d1e5SSatish Balay #undef __FUNC__
11985615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
1199ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1200cee3aa6bSSatish Balay {
1201cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1202cee3aa6bSSatish Balay   int         ierr;
1203d64ed03dSBarry Smith 
1204d64ed03dSBarry Smith   PetscFunctionBegin;
1205cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1206cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
12073a40ed3dSBarry Smith   PetscFunctionReturn(0);
1208cee3aa6bSSatish Balay }
1209026e39d0SSatish Balay 
12105615d1e5SSatish Balay #undef __FUNC__
12115615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
1212ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
121357b952d6SSatish Balay {
121457b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1215d64ed03dSBarry Smith 
1216d64ed03dSBarry Smith   PetscFunctionBegin;
1217bd7f49f5SSatish Balay   if (m) *m = mat->M;
1218bd7f49f5SSatish Balay   if (n) *n = mat->N;
12193a40ed3dSBarry Smith   PetscFunctionReturn(0);
122057b952d6SSatish Balay }
122157b952d6SSatish Balay 
12225615d1e5SSatish Balay #undef __FUNC__
12235615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1224ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
122557b952d6SSatish Balay {
122657b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1227d64ed03dSBarry Smith 
1228d64ed03dSBarry Smith   PetscFunctionBegin;
1229f830108cSBarry Smith   *m = mat->m; *n = mat->n;
12303a40ed3dSBarry Smith   PetscFunctionReturn(0);
123157b952d6SSatish Balay }
123257b952d6SSatish Balay 
12335615d1e5SSatish Balay #undef __FUNC__
12345615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1235ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
123657b952d6SSatish Balay {
123757b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1238d64ed03dSBarry Smith 
1239d64ed03dSBarry Smith   PetscFunctionBegin;
124057b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
12413a40ed3dSBarry Smith   PetscFunctionReturn(0);
124257b952d6SSatish Balay }
124357b952d6SSatish Balay 
12445615d1e5SSatish Balay #undef __FUNC__
12455615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1246acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1247acdf5bf4SSatish Balay {
1248acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1249acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1250acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1251d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1252d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1253acdf5bf4SSatish Balay 
1254d64ed03dSBarry Smith   PetscFunctionBegin;
1255a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1256acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1257acdf5bf4SSatish Balay 
1258acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1259acdf5bf4SSatish Balay     /*
1260acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1261acdf5bf4SSatish Balay     */
1262acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1263bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1264bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1265acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1266acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1267acdf5bf4SSatish Balay     }
1268acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1269acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1270acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1271acdf5bf4SSatish Balay   }
1272acdf5bf4SSatish Balay 
1273a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1274d9d09a02SSatish Balay   lrow = row - brstart;
1275acdf5bf4SSatish Balay 
1276acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1277acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1278acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1279f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1280f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1281acdf5bf4SSatish Balay   nztot = nzA + nzB;
1282acdf5bf4SSatish Balay 
1283acdf5bf4SSatish Balay   cmap  = mat->garray;
1284acdf5bf4SSatish Balay   if (v  || idx) {
1285acdf5bf4SSatish Balay     if (nztot) {
1286acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1287acdf5bf4SSatish Balay       int imark = -1;
1288acdf5bf4SSatish Balay       if (v) {
1289acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1290acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1291d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1292acdf5bf4SSatish Balay           else break;
1293acdf5bf4SSatish Balay         }
1294acdf5bf4SSatish Balay         imark = i;
1295acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1296acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1297acdf5bf4SSatish Balay       }
1298acdf5bf4SSatish Balay       if (idx) {
1299acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1300acdf5bf4SSatish Balay         if (imark > -1) {
1301acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1302bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1303acdf5bf4SSatish Balay           }
1304acdf5bf4SSatish Balay         } else {
1305acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1306d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1307d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1308acdf5bf4SSatish Balay             else break;
1309acdf5bf4SSatish Balay           }
1310acdf5bf4SSatish Balay           imark = i;
1311acdf5bf4SSatish Balay         }
1312d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1313d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1314acdf5bf4SSatish Balay       }
1315d64ed03dSBarry Smith     } else {
1316d212a18eSSatish Balay       if (idx) *idx = 0;
1317d212a18eSSatish Balay       if (v)   *v   = 0;
1318d212a18eSSatish Balay     }
1319acdf5bf4SSatish Balay   }
1320acdf5bf4SSatish Balay   *nz = nztot;
1321f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1322f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
13233a40ed3dSBarry Smith   PetscFunctionReturn(0);
1324acdf5bf4SSatish Balay }
1325acdf5bf4SSatish Balay 
13265615d1e5SSatish Balay #undef __FUNC__
13275615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1328acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1329acdf5bf4SSatish Balay {
1330acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1331d64ed03dSBarry Smith 
1332d64ed03dSBarry Smith   PetscFunctionBegin;
1333acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1334a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1335acdf5bf4SSatish Balay   }
1336acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
13373a40ed3dSBarry Smith   PetscFunctionReturn(0);
1338acdf5bf4SSatish Balay }
1339acdf5bf4SSatish Balay 
13405615d1e5SSatish Balay #undef __FUNC__
13415615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1342ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
13435a838052SSatish Balay {
13445a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1345d64ed03dSBarry Smith 
1346d64ed03dSBarry Smith   PetscFunctionBegin;
13475a838052SSatish Balay   *bs = baij->bs;
13483a40ed3dSBarry Smith   PetscFunctionReturn(0);
13495a838052SSatish Balay }
13505a838052SSatish Balay 
13515615d1e5SSatish Balay #undef __FUNC__
13525615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1353ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
135458667388SSatish Balay {
135558667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
135658667388SSatish Balay   int         ierr;
1357d64ed03dSBarry Smith 
1358d64ed03dSBarry Smith   PetscFunctionBegin;
135958667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
136058667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
13613a40ed3dSBarry Smith   PetscFunctionReturn(0);
136258667388SSatish Balay }
13630ac07820SSatish Balay 
13645615d1e5SSatish Balay #undef __FUNC__
13655615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1366ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
13670ac07820SSatish Balay {
13684e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
13694e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
13707d57db60SLois Curfman McInnes   int         ierr;
13717d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
13720ac07820SSatish Balay 
1373d64ed03dSBarry Smith   PetscFunctionBegin;
13744e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
13754e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
13760e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1377de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
13784e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
13790e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1380de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
13810ac07820SSatish Balay   if (flag == MAT_LOCAL) {
13824e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
13834e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
13844e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
13854e220ebcSLois Curfman McInnes     info->memory       = isend[3];
13864e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
13870ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1388f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
13894e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
13904e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
13914e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
13924e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
13934e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
13940ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1395f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
13964e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
13974e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
13984e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
13994e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14004e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1401d41123aaSBarry Smith   } else {
1402d41123aaSBarry Smith     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
14030ac07820SSatish Balay   }
14045a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
14055a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
14065a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
14075a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
14084e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14094e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14104e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14113a40ed3dSBarry Smith   PetscFunctionReturn(0);
14120ac07820SSatish Balay }
14130ac07820SSatish Balay 
14145615d1e5SSatish Balay #undef __FUNC__
14155615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1416ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
141758667388SSatish Balay {
141858667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
141998305bb5SBarry Smith   int         ierr;
142058667388SSatish Balay 
1421d64ed03dSBarry Smith   PetscFunctionBegin;
142258667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
142358667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
14246da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1425c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
14264787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
14274787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
142898305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
142998305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1430b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1431aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
143298305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
143398305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1434b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
14356da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
143658667388SSatish Balay              op == MAT_SYMMETRIC ||
143758667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1438b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
143998305bb5SBarry Smith              op == MAT_USE_HASH_TABLE) {
144058667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
144198305bb5SBarry Smith   } else if (op == MAT_COLUMN_ORIENTED) {
144258667388SSatish Balay     a->roworiented = 0;
144398305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
144498305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
14452b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
144690f02eecSBarry Smith     a->donotstash = 1;
1447d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1448d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1449133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1450133cdb44SSatish Balay     a->ht_flag = 1;
1451d64ed03dSBarry Smith   } else {
1452d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1453d64ed03dSBarry Smith   }
14543a40ed3dSBarry Smith   PetscFunctionReturn(0);
145558667388SSatish Balay }
145658667388SSatish Balay 
14575615d1e5SSatish Balay #undef __FUNC__
14585615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1459ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
14600ac07820SSatish Balay {
14610ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
14620ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
14630ac07820SSatish Balay   Mat        B;
146440011551SBarry Smith   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
14650ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
14660ac07820SSatish Balay   Scalar     *a;
14670ac07820SSatish Balay 
1468d64ed03dSBarry Smith   PetscFunctionBegin;
1469a8c6a408SBarry Smith   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
147028937c7bSBarry Smith   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
14710ac07820SSatish Balay   CHKERRQ(ierr);
14720ac07820SSatish Balay 
14730ac07820SSatish Balay   /* copy over the A part */
14740ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
14750ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
14760ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
14770ac07820SSatish Balay 
14780ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
14790ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
14800ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
14810ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
14820ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
14830ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
14840ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
14850ac07820SSatish Balay         col++; a += bs;
14860ac07820SSatish Balay       }
14870ac07820SSatish Balay     }
14880ac07820SSatish Balay   }
14890ac07820SSatish Balay   /* copy over the B part */
14900ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
14910ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
14920ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
14930ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
14940ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
14950ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
14960ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
14970ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
14980ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
14990ac07820SSatish Balay         col++; a += bs;
15000ac07820SSatish Balay       }
15010ac07820SSatish Balay     }
15020ac07820SSatish Balay   }
15030ac07820SSatish Balay   PetscFree(rvals);
15040ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15050ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
15060ac07820SSatish Balay 
15070ac07820SSatish Balay   if (matout != PETSC_NULL) {
15080ac07820SSatish Balay     *matout = B;
15090ac07820SSatish Balay   } else {
1510f830108cSBarry Smith     PetscOps *Abops;
1511cc2dc46cSBarry Smith     MatOps   Aops;
1512f830108cSBarry Smith 
15130ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
15140ac07820SSatish Balay     PetscFree(baij->rowners);
15150ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
15160ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1517b1fc9764SSatish Balay #if defined (USE_CTABLE)
1518b1fc9764SSatish Balay     if (baij->colmap) TableDelete(baij->colmap);
1519b1fc9764SSatish Balay #else
15200ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
1521b1fc9764SSatish Balay #endif
15220ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
15230ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
15240ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
15250ac07820SSatish Balay     PetscFree(baij);
1526f830108cSBarry Smith 
1527f830108cSBarry Smith     /*
1528f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1529f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1530f830108cSBarry Smith       else from C.
1531f830108cSBarry Smith     */
1532f830108cSBarry Smith     Abops = A->bops;
1533f830108cSBarry Smith     Aops  = A->ops;
1534f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1535f830108cSBarry Smith     A->bops = Abops;
1536f830108cSBarry Smith     A->ops  = Aops;
1537f830108cSBarry Smith 
15380ac07820SSatish Balay     PetscHeaderDestroy(B);
15390ac07820SSatish Balay   }
15403a40ed3dSBarry Smith   PetscFunctionReturn(0);
15410ac07820SSatish Balay }
15420e95ebc0SSatish Balay 
15435615d1e5SSatish Balay #undef __FUNC__
15445615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
15450e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
15460e95ebc0SSatish Balay {
15470e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
15480e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
15490e95ebc0SSatish Balay   int ierr,s1,s2,s3;
15500e95ebc0SSatish Balay 
1551d64ed03dSBarry Smith   PetscFunctionBegin;
15520e95ebc0SSatish Balay   if (ll)  {
15530e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
15540e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1555a8c6a408SBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
15560e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
15570e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
15580e95ebc0SSatish Balay   }
1559a8c6a408SBarry Smith   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
15603a40ed3dSBarry Smith   PetscFunctionReturn(0);
15610e95ebc0SSatish Balay }
15620e95ebc0SSatish Balay 
15635615d1e5SSatish Balay #undef __FUNC__
15645615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1565ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
15660ac07820SSatish Balay {
15670ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
15680ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1569a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
15700ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
15710ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1572a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
15730ac07820SSatish Balay   MPI_Comm       comm = A->comm;
15740ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
15750ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
15760ac07820SSatish Balay   IS             istmp;
15770ac07820SSatish Balay 
1578d64ed03dSBarry Smith   PetscFunctionBegin;
15790ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
15800ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
15810ac07820SSatish Balay 
15820ac07820SSatish Balay   /*  first count number of contributors to each processor */
15830ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
15840ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
15850ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
15860ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
15870ac07820SSatish Balay     idx = rows[i];
15880ac07820SSatish Balay     found = 0;
15890ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
15900ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
15910ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
15920ac07820SSatish Balay       }
15930ac07820SSatish Balay     }
1594a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
15950ac07820SSatish Balay   }
15960ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
15970ac07820SSatish Balay 
15980ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
15990ac07820SSatish Balay   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1600ca161407SBarry Smith   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
16010ac07820SSatish Balay   nrecvs = work[rank];
1602ca161407SBarry Smith   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
16030ac07820SSatish Balay   nmax   = work[rank];
16040ac07820SSatish Balay   PetscFree(work);
16050ac07820SSatish Balay 
16060ac07820SSatish Balay   /* post receives:   */
1607d64ed03dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1608d64ed03dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
16090ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
1610ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
16110ac07820SSatish Balay   }
16120ac07820SSatish Balay 
16130ac07820SSatish Balay   /* do sends:
16140ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
16150ac07820SSatish Balay      the ith processor
16160ac07820SSatish Balay   */
16170ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1618ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
16190ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
16200ac07820SSatish Balay   starts[0] = 0;
16210ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
16220ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
16230ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
16240ac07820SSatish Balay   }
16250ac07820SSatish Balay   ISRestoreIndices(is,&rows);
16260ac07820SSatish Balay 
16270ac07820SSatish Balay   starts[0] = 0;
16280ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
16290ac07820SSatish Balay   count = 0;
16300ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
16310ac07820SSatish Balay     if (procs[i]) {
1632ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
16330ac07820SSatish Balay     }
16340ac07820SSatish Balay   }
16350ac07820SSatish Balay   PetscFree(starts);
16360ac07820SSatish Balay 
16370ac07820SSatish Balay   base = owners[rank]*bs;
16380ac07820SSatish Balay 
16390ac07820SSatish Balay   /*  wait on receives */
16400ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
16410ac07820SSatish Balay   source = lens + nrecvs;
16420ac07820SSatish Balay   count  = nrecvs; slen = 0;
16430ac07820SSatish Balay   while (count) {
1644ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
16450ac07820SSatish Balay     /* unpack receives into our local space */
1646ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
16470ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
16480ac07820SSatish Balay     lens[imdex]  = n;
16490ac07820SSatish Balay     slen += n;
16500ac07820SSatish Balay     count--;
16510ac07820SSatish Balay   }
16520ac07820SSatish Balay   PetscFree(recv_waits);
16530ac07820SSatish Balay 
16540ac07820SSatish Balay   /* move the data into the send scatter */
16550ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
16560ac07820SSatish Balay   count = 0;
16570ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
16580ac07820SSatish Balay     values = rvalues + i*nmax;
16590ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
16600ac07820SSatish Balay       lrows[count++] = values[j] - base;
16610ac07820SSatish Balay     }
16620ac07820SSatish Balay   }
16630ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
16640ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
16650ac07820SSatish Balay 
16660ac07820SSatish Balay   /* actually zap the local rows */
1667029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
16680ac07820SSatish Balay   PLogObjectParent(A,istmp);
1669a07cd24cSSatish Balay 
167072dacd9aSBarry Smith   /*
167172dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
167272dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
167372dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
167472dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
167572dacd9aSBarry Smith 
167672dacd9aSBarry Smith        Contributed by: Mathew Knepley
167772dacd9aSBarry Smith   */
16789c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
16790ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
16809c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
16819c957beeSSatish Balay     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
16829c957beeSSatish Balay   } else if (diag) {
16839c957beeSSatish Balay     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1684fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1685fa46199cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1686fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
16876525c446SSatish Balay     }
1688a07cd24cSSatish Balay     for ( i = 0; i < slen; i++ ) {
1689a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
1690a07cd24cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1691a07cd24cSSatish Balay     }
1692a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
16949c957beeSSatish Balay   } else {
16959c957beeSSatish Balay     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1696a07cd24cSSatish Balay   }
16979c957beeSSatish Balay 
16989c957beeSSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1699a07cd24cSSatish Balay   PetscFree(lrows);
1700a07cd24cSSatish Balay 
17010ac07820SSatish Balay   /* wait on sends */
17020ac07820SSatish Balay   if (nsends) {
1703d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1704ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
17050ac07820SSatish Balay     PetscFree(send_status);
17060ac07820SSatish Balay   }
17070ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
17080ac07820SSatish Balay 
17093a40ed3dSBarry Smith   PetscFunctionReturn(0);
17100ac07820SSatish Balay }
171172dacd9aSBarry Smith 
17125615d1e5SSatish Balay #undef __FUNC__
17135615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1714ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1715ba4ca20aSSatish Balay {
1716ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
171725fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1718133cdb44SSatish Balay   static int  called = 0;
17193a40ed3dSBarry Smith   int         ierr;
1720ba4ca20aSSatish Balay 
1721d64ed03dSBarry Smith   PetscFunctionBegin;
17223a40ed3dSBarry Smith   if (!a->rank) {
17233a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
172425fdafccSSatish Balay   }
172525fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1726133cdb44SSatish Balay   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1727133cdb44SSatish Balay   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
17283a40ed3dSBarry Smith   PetscFunctionReturn(0);
1729ba4ca20aSSatish Balay }
17300ac07820SSatish Balay 
17315615d1e5SSatish Balay #undef __FUNC__
17325615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1733ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1734bb5a7306SBarry Smith {
1735bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1736bb5a7306SBarry Smith   int         ierr;
1737d64ed03dSBarry Smith 
1738d64ed03dSBarry Smith   PetscFunctionBegin;
1739bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
17403a40ed3dSBarry Smith   PetscFunctionReturn(0);
1741bb5a7306SBarry Smith }
1742bb5a7306SBarry Smith 
17432e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
17440ac07820SSatish Balay 
174579bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1746cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1747cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1748cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1749cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1750cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1751cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
1752cc2dc46cSBarry Smith   MatMultTrans_MPIBAIJ,
1753cc2dc46cSBarry Smith   MatMultTransAdd_MPIBAIJ,
1754cc2dc46cSBarry Smith   0,
1755cc2dc46cSBarry Smith   0,
1756cc2dc46cSBarry Smith   0,
1757cc2dc46cSBarry Smith   0,
1758cc2dc46cSBarry Smith   0,
1759cc2dc46cSBarry Smith   0,
1760cc2dc46cSBarry Smith   0,
1761cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1762cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
1763cc2dc46cSBarry Smith   0,
1764cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1765cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1766cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1767cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1768cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1769cc2dc46cSBarry Smith   0,
1770cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1771cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1772cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1773cc2dc46cSBarry Smith   0,
1774cc2dc46cSBarry Smith   0,
1775cc2dc46cSBarry Smith   0,
1776cc2dc46cSBarry Smith   0,
1777cc2dc46cSBarry Smith   MatGetSize_MPIBAIJ,
1778cc2dc46cSBarry Smith   MatGetLocalSize_MPIBAIJ,
1779cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1780cc2dc46cSBarry Smith   0,
1781cc2dc46cSBarry Smith   0,
1782cc2dc46cSBarry Smith   0,
1783cc2dc46cSBarry Smith   0,
17842e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1785cc2dc46cSBarry Smith   0,
1786cc2dc46cSBarry Smith   0,
1787cc2dc46cSBarry Smith   0,
1788cc2dc46cSBarry Smith   0,
1789cc2dc46cSBarry Smith   0,
1790cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1791cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1792cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1793cc2dc46cSBarry Smith   0,
1794cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1795cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1796cc2dc46cSBarry Smith   0,
1797cc2dc46cSBarry Smith   0,
1798cc2dc46cSBarry Smith   0,
1799cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1800cc2dc46cSBarry Smith   0,
1801cc2dc46cSBarry Smith   0,
1802cc2dc46cSBarry Smith   0,
1803cc2dc46cSBarry Smith   0,
1804cc2dc46cSBarry Smith   0,
1805cc2dc46cSBarry Smith   0,
1806cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
1807cc2dc46cSBarry Smith   0,
1808cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
1809cc2dc46cSBarry Smith   0,
1810cc2dc46cSBarry Smith   0,
1811cc2dc46cSBarry Smith   0,
1812cc2dc46cSBarry Smith   MatGetMaps_Petsc};
181379bdfe76SSatish Balay 
18145ef9f2a5SBarry Smith 
1815e18c124aSSatish Balay EXTERN_C_BEGIN
18165ef9f2a5SBarry Smith #undef __FUNC__
18175ef9f2a5SBarry Smith #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
18185ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
18195ef9f2a5SBarry Smith {
18205ef9f2a5SBarry Smith   PetscFunctionBegin;
18215ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
18225ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
18235ef9f2a5SBarry Smith   PetscFunctionReturn(0);
18245ef9f2a5SBarry Smith }
1825e18c124aSSatish Balay EXTERN_C_END
182679bdfe76SSatish Balay 
18275615d1e5SSatish Balay #undef __FUNC__
18285615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
182979bdfe76SSatish Balay /*@C
183079bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
183179bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
183279bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
183379bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
183479bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
183579bdfe76SSatish Balay 
1836db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
1837db81eaa0SLois Curfman McInnes 
183879bdfe76SSatish Balay    Input Parameters:
1839db81eaa0SLois Curfman McInnes +  comm - MPI communicator
184079bdfe76SSatish Balay .  bs   - size of blockk
184179bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
184292e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
184392e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
184492e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
184592e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
184692e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
1847be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1848be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
184979bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
185079bdfe76SSatish Balay            submatrix  (same for all local rows)
185192e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
185292e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
1853db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
185492e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
185579bdfe76SSatish Balay            submatrix (same for all local rows).
1856db81eaa0SLois Curfman McInnes -  o_nzz - array containing the number of nonzeros in the various block rows of the
185792e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
185892e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
185979bdfe76SSatish Balay 
186079bdfe76SSatish Balay    Output Parameter:
186179bdfe76SSatish Balay .  A - the matrix
186279bdfe76SSatish Balay 
1863db81eaa0SLois Curfman McInnes    Options Database Keys:
1864db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
1865db81eaa0SLois Curfman McInnes                      block calculations (much slower)
1866db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
1867494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
1868494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
18693ffaccefSLois Curfman McInnes 
1870b259b22eSLois Curfman McInnes    Notes:
187179bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
187279bdfe76SSatish Balay    (possibly both).
187379bdfe76SSatish Balay 
1874be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1875be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
1876be79a94dSBarry Smith 
187779bdfe76SSatish Balay    Storage Information:
187879bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
187979bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
188079bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
188179bdfe76SSatish Balay    local matrix (a rectangular submatrix).
188279bdfe76SSatish Balay 
188379bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
188479bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
188579bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
188679bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
188779bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
188879bdfe76SSatish Balay 
188979bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
189079bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
189179bdfe76SSatish Balay 
1892db81eaa0SLois Curfman McInnes .vb
1893db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
1894db81eaa0SLois Curfman McInnes           -------------------
1895db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
1896db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
1897db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
1898db81eaa0SLois Curfman McInnes           -------------------
1899db81eaa0SLois Curfman McInnes .ve
190079bdfe76SSatish Balay 
190179bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
190279bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
190379bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
190457b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
190579bdfe76SSatish Balay 
1906d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1907d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
190879bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
190992e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
191092e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
19116da5968aSLois Curfman McInnes    matrices.
191279bdfe76SSatish Balay 
1913027ccd11SLois Curfman McInnes    Level: intermediate
1914027ccd11SLois Curfman McInnes 
191592e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
191679bdfe76SSatish Balay 
1917db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
191879bdfe76SSatish Balay @*/
191979bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
192079bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
192179bdfe76SSatish Balay {
192279bdfe76SSatish Balay   Mat          B;
192379bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
1924133cdb44SSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
1925a2ab621fSBarry Smith   int          flag1 = 0,flag2 = 0;
192679bdfe76SSatish Balay 
1927d64ed03dSBarry Smith   PetscFunctionBegin;
1928a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
19293914022bSBarry Smith 
19303914022bSBarry Smith   MPI_Comm_size(comm,&size);
1931494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
1932494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
1933494eafd4SBarry Smith   if (!flag1 && !flag2 && size == 1) {
19343914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
19353914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
19363914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
19373a40ed3dSBarry Smith     PetscFunctionReturn(0);
19383914022bSBarry Smith   }
19393914022bSBarry Smith 
194079bdfe76SSatish Balay   *A = 0;
19413f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
194279bdfe76SSatish Balay   PLogObjectCreate(B);
194379bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
194479bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1945cc2dc46cSBarry Smith   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
19464c50302cSBarry Smith 
1947e1311b90SBarry Smith   B->ops->destroy    = MatDestroy_MPIBAIJ;
1948e1311b90SBarry Smith   B->ops->view       = MatView_MPIBAIJ;
194990f02eecSBarry Smith   B->mapping    = 0;
195079bdfe76SSatish Balay   B->factor     = 0;
195179bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
195279bdfe76SSatish Balay 
1953e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
195479bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
195579bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
195679bdfe76SSatish Balay 
1957d64ed03dSBarry Smith   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
1958a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
1959d64ed03dSBarry Smith   }
1960a8c6a408SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
1961a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
1962a8c6a408SBarry Smith   }
1963a8c6a408SBarry Smith   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
1964a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
1965a8c6a408SBarry Smith   }
1966cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
1967cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
196879bdfe76SSatish Balay 
196979bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
197079bdfe76SSatish Balay     work[0] = m; work[1] = n;
197179bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
1972ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
197379bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
197479bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
197579bdfe76SSatish Balay   }
197679bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
197779bdfe76SSatish Balay     Mbs = M/bs;
1978a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
197979bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
198079bdfe76SSatish Balay     m   = mbs*bs;
198179bdfe76SSatish Balay   }
198279bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
198379bdfe76SSatish Balay     Nbs = N/bs;
1984a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
198579bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
198679bdfe76SSatish Balay     n   = nbs*bs;
198779bdfe76SSatish Balay   }
1988a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
1989a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
1990a8c6a408SBarry Smith   }
199179bdfe76SSatish Balay 
199279bdfe76SSatish Balay   b->m = m; B->m = m;
199379bdfe76SSatish Balay   b->n = n; B->n = n;
199479bdfe76SSatish Balay   b->N = N; B->N = N;
199579bdfe76SSatish Balay   b->M = M; B->M = M;
199679bdfe76SSatish Balay   b->bs  = bs;
199779bdfe76SSatish Balay   b->bs2 = bs*bs;
199879bdfe76SSatish Balay   b->mbs = mbs;
199979bdfe76SSatish Balay   b->nbs = nbs;
200079bdfe76SSatish Balay   b->Mbs = Mbs;
200179bdfe76SSatish Balay   b->Nbs = Nbs;
200279bdfe76SSatish Balay 
2003c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
2004c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
2005488ecbafSBarry Smith   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2006488ecbafSBarry Smith   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2007c7fcc2eaSBarry Smith 
200879bdfe76SSatish Balay   /* build local table of row and column ownerships */
2009*ff2fd236SBarry Smith   b->rowners = (int *) PetscMalloc(3*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
2010*ff2fd236SBarry Smith   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
20110ac07820SSatish Balay   b->cowners    = b->rowners + b->size + 2;
2012*ff2fd236SBarry Smith   b->rowners_bs = b->cowners + b->size + 2;
2013ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
201479bdfe76SSatish Balay   b->rowners[0]    = 0;
201579bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
201679bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
201779bdfe76SSatish Balay   }
2018*ff2fd236SBarry Smith   for ( i=0; i<=b->size; i++ ) {
2019*ff2fd236SBarry Smith     b->rowners_bs[i] = b->rowners[i]*bs;
2020*ff2fd236SBarry Smith   }
202179bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
202279bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
20234fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
20244fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
20254fa0d573SSatish Balay 
2026ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
202779bdfe76SSatish Balay   b->cowners[0] = 0;
202879bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
202979bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
203079bdfe76SSatish Balay   }
203179bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
203279bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
20334fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
20344fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
203579bdfe76SSatish Balay 
2036a07cd24cSSatish Balay 
203779bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2038029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
203979bdfe76SSatish Balay   PLogObjectParent(B,b->A);
204079bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2041029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
204279bdfe76SSatish Balay   PLogObjectParent(B,b->B);
204379bdfe76SSatish Balay 
204479bdfe76SSatish Balay   /* build cache for off array entries formed */
2045*ff2fd236SBarry Smith   ierr = StashCreate_Private(comm,1,&b->stash); CHKERRQ(ierr);
2046*ff2fd236SBarry Smith   ierr = StashCreate_Private(comm,bs,&b->bstash); CHKERRQ(ierr);
204790f02eecSBarry Smith   b->donotstash  = 0;
204879bdfe76SSatish Balay   b->colmap      = 0;
204979bdfe76SSatish Balay   b->garray      = 0;
205079bdfe76SSatish Balay   b->roworiented = 1;
205179bdfe76SSatish Balay 
205230793edcSSatish Balay   /* stuff used in block assembly */
205330793edcSSatish Balay   b->barray       = 0;
205430793edcSSatish Balay 
205579bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
205679bdfe76SSatish Balay   b->lvec         = 0;
205779bdfe76SSatish Balay   b->Mvctx        = 0;
205879bdfe76SSatish Balay 
205979bdfe76SSatish Balay   /* stuff for MatGetRow() */
206079bdfe76SSatish Balay   b->rowindices   = 0;
206179bdfe76SSatish Balay   b->rowvalues    = 0;
206279bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
206379bdfe76SSatish Balay 
2064a07cd24cSSatish Balay   /* hash table stuff */
2065a07cd24cSSatish Balay   b->ht           = 0;
2066187ce0cbSSatish Balay   b->hd           = 0;
20670bdbc534SSatish Balay   b->ht_size      = 0;
2068133cdb44SSatish Balay   b->ht_flag      = 0;
206925fdafccSSatish Balay   b->ht_fact      = 0;
2070187ce0cbSSatish Balay   b->ht_total_ct  = 0;
2071187ce0cbSSatish Balay   b->ht_insert_ct = 0;
2072a07cd24cSSatish Balay 
207379bdfe76SSatish Balay   *A = B;
2074133cdb44SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2075133cdb44SSatish Balay   if (flg) {
2076133cdb44SSatish Balay     double fact = 1.39;
2077133cdb44SSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2078133cdb44SSatish Balay     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2079133cdb44SSatish Balay     if (fact <= 1.0) fact = 1.39;
2080133cdb44SSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2081133cdb44SSatish Balay     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2082133cdb44SSatish Balay   }
20835ef9f2a5SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
20845ef9f2a5SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
20855ef9f2a5SBarry Smith                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
20863a40ed3dSBarry Smith   PetscFunctionReturn(0);
208779bdfe76SSatish Balay }
2088026e39d0SSatish Balay 
20895615d1e5SSatish Balay #undef __FUNC__
20902e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ"
20912e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
20920ac07820SSatish Balay {
20930ac07820SSatish Balay   Mat         mat;
20940ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
20950ac07820SSatish Balay   int         ierr, len=0, flg;
20960ac07820SSatish Balay 
2097d64ed03dSBarry Smith   PetscFunctionBegin;
20980ac07820SSatish Balay   *newmat       = 0;
20993f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
21000ac07820SSatish Balay   PLogObjectCreate(mat);
21010ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2102cc2dc46cSBarry Smith   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2103e1311b90SBarry Smith   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2104e1311b90SBarry Smith   mat->ops->view       = MatView_MPIBAIJ;
21050ac07820SSatish Balay   mat->factor          = matin->factor;
21060ac07820SSatish Balay   mat->assembled       = PETSC_TRUE;
2107aef5e8e0SSatish Balay   mat->insertmode      = NOT_SET_VALUES;
21080ac07820SSatish Balay 
21090ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
21100ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
21110ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
21120ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
21130ac07820SSatish Balay 
21140ac07820SSatish Balay   a->bs  = oldmat->bs;
21150ac07820SSatish Balay   a->bs2 = oldmat->bs2;
21160ac07820SSatish Balay   a->mbs = oldmat->mbs;
21170ac07820SSatish Balay   a->nbs = oldmat->nbs;
21180ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
21190ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
21200ac07820SSatish Balay 
21210ac07820SSatish Balay   a->rstart       = oldmat->rstart;
21220ac07820SSatish Balay   a->rend         = oldmat->rend;
21230ac07820SSatish Balay   a->cstart       = oldmat->cstart;
21240ac07820SSatish Balay   a->cend         = oldmat->cend;
21250ac07820SSatish Balay   a->size         = oldmat->size;
21260ac07820SSatish Balay   a->rank         = oldmat->rank;
2127aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2128aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2129aef5e8e0SSatish Balay   a->rowindices   = 0;
21300ac07820SSatish Balay   a->rowvalues    = 0;
21310ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
213230793edcSSatish Balay   a->barray       = 0;
21333123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
21343123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
21353123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
21363123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
21370ac07820SSatish Balay 
2138133cdb44SSatish Balay   /* hash table stuff */
2139133cdb44SSatish Balay   a->ht           = 0;
2140133cdb44SSatish Balay   a->hd           = 0;
2141133cdb44SSatish Balay   a->ht_size      = 0;
2142133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
214325fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2144133cdb44SSatish Balay   a->ht_total_ct  = 0;
2145133cdb44SSatish Balay   a->ht_insert_ct = 0;
2146133cdb44SSatish Balay 
2147133cdb44SSatish Balay 
2148*ff2fd236SBarry Smith   a->rowners = (int *) PetscMalloc(3*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2149*ff2fd236SBarry Smith   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
21500ac07820SSatish Balay   a->cowners    = a->rowners + a->size + 2;
2151*ff2fd236SBarry Smith   a->rowners_bs = a->cowners + a->size + 2;
2152*ff2fd236SBarry Smith   PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));
2153*ff2fd236SBarry Smith   ierr = StashCreate_Private(matin->comm,1,&a->stash); CHKERRQ(ierr);
2154*ff2fd236SBarry Smith   ierr = StashCreate_Private(matin->comm,oldmat->bs,&a->bstash); CHKERRQ(ierr);
21550ac07820SSatish Balay   if (oldmat->colmap) {
215648e59246SSatish Balay #if defined (USE_CTABLE)
2157fa46199cSSatish Balay   ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
215848e59246SSatish Balay #else
21590ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
21600ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
21610ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
216248e59246SSatish Balay #endif
21630ac07820SSatish Balay   } else a->colmap = 0;
21640ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
21650ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
21660ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
21670ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
21680ac07820SSatish Balay   } else a->garray = 0;
21690ac07820SSatish Balay 
21700ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
21710ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
21720ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2173e18c124aSSatish Balay 
21740ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
21752e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
21760ac07820SSatish Balay   PLogObjectParent(mat,a->A);
21772e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
21780ac07820SSatish Balay   PLogObjectParent(mat,a->B);
21790ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2180e18c124aSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
21810ac07820SSatish Balay   if (flg) {
21820ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
21830ac07820SSatish Balay   }
21840ac07820SSatish Balay   *newmat = mat;
21853a40ed3dSBarry Smith   PetscFunctionReturn(0);
21860ac07820SSatish Balay }
218757b952d6SSatish Balay 
218857b952d6SSatish Balay #include "sys.h"
218957b952d6SSatish Balay 
21905615d1e5SSatish Balay #undef __FUNC__
21915615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
219257b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
219357b952d6SSatish Balay {
219457b952d6SSatish Balay   Mat          A;
219557b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
219657b952d6SSatish Balay   Scalar       *vals,*buf;
219757b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
219857b952d6SSatish Balay   MPI_Status   status;
2199cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
220057b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
220140011551SBarry Smith   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
220257b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
220357b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
220457b952d6SSatish Balay 
2205d64ed03dSBarry Smith   PetscFunctionBegin;
220657b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
220757b952d6SSatish Balay 
220857b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
220957b952d6SSatish Balay   if (!rank) {
221057b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2211e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2212a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2213d64ed03dSBarry Smith     if (header[3] < 0) {
2214a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2215d64ed03dSBarry Smith     }
22166c5fab8fSBarry Smith   }
2217d64ed03dSBarry Smith 
2218ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
221957b952d6SSatish Balay   M = header[1]; N = header[2];
222057b952d6SSatish Balay 
2221a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
222257b952d6SSatish Balay 
222357b952d6SSatish Balay   /*
222457b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
222557b952d6SSatish Balay      divisible by the blocksize
222657b952d6SSatish Balay   */
222757b952d6SSatish Balay   Mbs        = M/bs;
222857b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
222957b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
223057b952d6SSatish Balay   else                  Mbs++;
223157b952d6SSatish Balay   if (extra_rows &&!rank) {
2232b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
223357b952d6SSatish Balay   }
2234537820f0SBarry Smith 
223557b952d6SSatish Balay   /* determine ownership of all rows */
223657b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
223757b952d6SSatish Balay   m   = mbs * bs;
2238cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2239cee3aa6bSSatish Balay   browners = rowners + size + 1;
2240ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
224157b952d6SSatish Balay   rowners[0] = 0;
2242cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2243cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
224457b952d6SSatish Balay   rstart = rowners[rank];
224557b952d6SSatish Balay   rend   = rowners[rank+1];
224657b952d6SSatish Balay 
224757b952d6SSatish Balay   /* distribute row lengths to all processors */
224857b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
224957b952d6SSatish Balay   if (!rank) {
225057b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2251e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
225257b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
225357b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2254cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2255ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
225657b952d6SSatish Balay     PetscFree(sndcounts);
2257d64ed03dSBarry Smith   } else {
2258ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
225957b952d6SSatish Balay   }
226057b952d6SSatish Balay 
226157b952d6SSatish Balay   if (!rank) {
226257b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
226357b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
226457b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
226557b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
226657b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
226757b952d6SSatish Balay         procsnz[i] += rowlengths[j];
226857b952d6SSatish Balay       }
226957b952d6SSatish Balay     }
227057b952d6SSatish Balay     PetscFree(rowlengths);
227157b952d6SSatish Balay 
227257b952d6SSatish Balay     /* determine max buffer needed and allocate it */
227357b952d6SSatish Balay     maxnz = 0;
227457b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
227557b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
227657b952d6SSatish Balay     }
227757b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
227857b952d6SSatish Balay 
227957b952d6SSatish Balay     /* read in my part of the matrix column indices  */
228057b952d6SSatish Balay     nz = procsnz[0];
228157b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
228257b952d6SSatish Balay     mycols = ibuf;
2283cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2284e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2285cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2286cee3aa6bSSatish Balay 
228757b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
228857b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
228957b952d6SSatish Balay       nz   = procsnz[i];
2290e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2291ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
229257b952d6SSatish Balay     }
229357b952d6SSatish Balay     /* read in the stuff for the last proc */
229457b952d6SSatish Balay     if ( size != 1 ) {
229557b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2296e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
229757b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2298ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
229957b952d6SSatish Balay     }
230057b952d6SSatish Balay     PetscFree(cols);
2301d64ed03dSBarry Smith   } else {
230257b952d6SSatish Balay     /* determine buffer space needed for message */
230357b952d6SSatish Balay     nz = 0;
230457b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
230557b952d6SSatish Balay       nz += locrowlens[i];
230657b952d6SSatish Balay     }
230757b952d6SSatish Balay     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
230857b952d6SSatish Balay     mycols = ibuf;
230957b952d6SSatish Balay     /* receive message of column indices*/
2310ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2311ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2312a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
231357b952d6SSatish Balay   }
231457b952d6SSatish Balay 
231557b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2316cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2317cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
231857b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2319cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
232057b952d6SSatish Balay   masked1 = mask    + Mbs;
232157b952d6SSatish Balay   masked2 = masked1 + Mbs;
232257b952d6SSatish Balay   rowcount = 0; nzcount = 0;
232357b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
232457b952d6SSatish Balay     dcount  = 0;
232557b952d6SSatish Balay     odcount = 0;
232657b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
232757b952d6SSatish Balay       kmax = locrowlens[rowcount];
232857b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
232957b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
233057b952d6SSatish Balay         if (!mask[tmp]) {
233157b952d6SSatish Balay           mask[tmp] = 1;
233257b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
233357b952d6SSatish Balay           else masked1[dcount++] = tmp;
233457b952d6SSatish Balay         }
233557b952d6SSatish Balay       }
233657b952d6SSatish Balay       rowcount++;
233757b952d6SSatish Balay     }
2338cee3aa6bSSatish Balay 
233957b952d6SSatish Balay     dlens[i]  = dcount;
234057b952d6SSatish Balay     odlens[i] = odcount;
2341cee3aa6bSSatish Balay 
234257b952d6SSatish Balay     /* zero out the mask elements we set */
234357b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
234457b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
234557b952d6SSatish Balay   }
2346cee3aa6bSSatish Balay 
234757b952d6SSatish Balay   /* create our matrix */
2348537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2349537820f0SBarry Smith          CHKERRQ(ierr);
235057b952d6SSatish Balay   A = *newmat;
23516d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
235257b952d6SSatish Balay 
235357b952d6SSatish Balay   if (!rank) {
235457b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
235557b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
235657b952d6SSatish Balay     nz = procsnz[0];
235757b952d6SSatish Balay     vals = buf;
2358cee3aa6bSSatish Balay     mycols = ibuf;
2359cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2360e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2361cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2362537820f0SBarry Smith 
236357b952d6SSatish Balay     /* insert into matrix */
236457b952d6SSatish Balay     jj      = rstart*bs;
236557b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
236657b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
236757b952d6SSatish Balay       mycols += locrowlens[i];
236857b952d6SSatish Balay       vals   += locrowlens[i];
236957b952d6SSatish Balay       jj++;
237057b952d6SSatish Balay     }
237157b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
237257b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
237357b952d6SSatish Balay       nz   = procsnz[i];
237457b952d6SSatish Balay       vals = buf;
2375e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2376ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
237757b952d6SSatish Balay     }
237857b952d6SSatish Balay     /* the last proc */
237957b952d6SSatish Balay     if ( size != 1 ){
238057b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2381cee3aa6bSSatish Balay       vals = buf;
2382e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
238357b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2384ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
238557b952d6SSatish Balay     }
238657b952d6SSatish Balay     PetscFree(procsnz);
2387d64ed03dSBarry Smith   } else {
238857b952d6SSatish Balay     /* receive numeric values */
238957b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
239057b952d6SSatish Balay 
239157b952d6SSatish Balay     /* receive message of values*/
239257b952d6SSatish Balay     vals   = buf;
2393cee3aa6bSSatish Balay     mycols = ibuf;
2394ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2395ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2396a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
239757b952d6SSatish Balay 
239857b952d6SSatish Balay     /* insert into matrix */
239957b952d6SSatish Balay     jj      = rstart*bs;
2400cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
240157b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
240257b952d6SSatish Balay       mycols += locrowlens[i];
240357b952d6SSatish Balay       vals   += locrowlens[i];
240457b952d6SSatish Balay       jj++;
240557b952d6SSatish Balay     }
240657b952d6SSatish Balay   }
240757b952d6SSatish Balay   PetscFree(locrowlens);
240857b952d6SSatish Balay   PetscFree(buf);
240957b952d6SSatish Balay   PetscFree(ibuf);
241057b952d6SSatish Balay   PetscFree(rowners);
241157b952d6SSatish Balay   PetscFree(dlens);
2412cee3aa6bSSatish Balay   PetscFree(mask);
24136d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
24146d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
24153a40ed3dSBarry Smith   PetscFunctionReturn(0);
241657b952d6SSatish Balay }
241757b952d6SSatish Balay 
241857b952d6SSatish Balay 
2419133cdb44SSatish Balay 
2420133cdb44SSatish Balay #undef __FUNC__
2421133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2422133cdb44SSatish Balay /*@
2423133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2424133cdb44SSatish Balay 
2425133cdb44SSatish Balay    Input Parameters:
2426133cdb44SSatish Balay .  mat  - the matrix
2427133cdb44SSatish Balay .  fact - factor
2428133cdb44SSatish Balay 
2429fee21e36SBarry Smith    Collective on Mat
2430fee21e36SBarry Smith 
2431133cdb44SSatish Balay   Notes:
2432133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2433133cdb44SSatish Balay 
2434133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2435133cdb44SSatish Balay 
2436133cdb44SSatish Balay .seealso: MatSetOption()
2437133cdb44SSatish Balay @*/
2438133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2439133cdb44SSatish Balay {
244025fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2441133cdb44SSatish Balay 
2442133cdb44SSatish Balay   PetscFunctionBegin;
2443133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
244425fdafccSSatish Balay   if (mat->type != MATMPIBAIJ) {
2445133cdb44SSatish Balay       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2446133cdb44SSatish Balay   }
2447133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*) mat->data;
2448133cdb44SSatish Balay   baij->ht_fact = fact;
2449133cdb44SSatish Balay   PetscFunctionReturn(0);
2450133cdb44SSatish Balay }
2451