xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision bbb85fb35fa65146d77861245dba6ac34c084efa)
1a5eb4965SSatish Balay #ifdef PETSC_RCS_HEADER
2*bbb85fb3SSatish Balay static char vcid[] = "$Id: mpibaij.c,v 1.155 1999/02/26 17:08:24 balay Exp balay $";
379bdfe76SSatish Balay #endif
479bdfe76SSatish Balay 
577ed5343SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
6c16cb8f2SBarry Smith #include "src/vec/vecimpl.h"
779bdfe76SSatish Balay 
857b952d6SSatish Balay extern int MatSetUpMultiply_MPIBAIJ(Mat);
957b952d6SSatish Balay extern int DisAssemble_MPIBAIJ(Mat);
10d212a18eSSatish Balay extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
117b2a1423SBarry Smith extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12946de2abSSatish Balay extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
13*bbb85fb3SSatish Balay extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14*bbb85fb3SSatish Balay extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15*bbb85fb3SSatish Balay extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
16*bbb85fb3SSatish Balay extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
17*bbb85fb3SSatish Balay extern int MatPrintHelp_SeqBAIJ(Mat);
183b2fbd54SBarry Smith 
19537820f0SBarry Smith /*
20537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
2157b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
2257b952d6SSatish Balay    storage of the matrix.  This is done in a non scable way since the
2357b952d6SSatish Balay    length of colmap equals the global matrix length.
2457b952d6SSatish Balay */
255615d1e5SSatish Balay #undef __FUNC__
265615d1e5SSatish Balay #define __FUNC__ "CreateColmap_MPIBAIJ_Private"
2757b952d6SSatish Balay static int CreateColmap_MPIBAIJ_Private(Mat mat)
2857b952d6SSatish Balay {
2957b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
3057b952d6SSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*) baij->B->data;
316dee3f7eSBarry Smith   int         nbs = B->nbs,i,bs=B->bs;
326dee3f7eSBarry Smith #if defined (USE_CTABLE)
336dee3f7eSBarry Smith   int         ierr;
346dee3f7eSBarry Smith #endif
3557b952d6SSatish Balay 
36d64ed03dSBarry Smith   PetscFunctionBegin;
3748e59246SSatish Balay #if defined (USE_CTABLE)
38fa46199cSSatish Balay   ierr = TableCreate(baij->nbs/5,&baij->colmap); CHKERRQ(ierr);
3948e59246SSatish Balay   for ( i=0; i<nbs; i++ ){
4048e59246SSatish Balay     ierr = TableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
4148e59246SSatish Balay   }
4248e59246SSatish Balay #else
43758f045eSSatish Balay   baij->colmap = (int *) PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
4457b952d6SSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
4557b952d6SSatish Balay   PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
46928fc39bSSatish Balay   for ( i=0; i<nbs; i++ ) baij->colmap[baij->garray[i]] = i*bs+1;
4748e59246SSatish Balay #endif
483a40ed3dSBarry Smith   PetscFunctionReturn(0);
4957b952d6SSatish Balay }
5057b952d6SSatish Balay 
5180c1aa95SSatish Balay #define CHUNKSIZE  10
5280c1aa95SSatish Balay 
53f5e9677aSSatish Balay #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
5480c1aa95SSatish Balay { \
5580c1aa95SSatish Balay  \
5680c1aa95SSatish Balay     brow = row/bs;  \
5780c1aa95SSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
58ac7a638eSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
5980c1aa95SSatish Balay       bcol = col/bs; \
6080c1aa95SSatish Balay       ridx = row % bs; cidx = col % bs; \
61ab26458aSBarry Smith       low = 0; high = nrow; \
62ab26458aSBarry Smith       while (high-low > 3) { \
63ab26458aSBarry Smith         t = (low+high)/2; \
64ab26458aSBarry Smith         if (rp[t] > bcol) high = t; \
65ab26458aSBarry Smith         else              low  = t; \
66ab26458aSBarry Smith       } \
67ab26458aSBarry Smith       for ( _i=low; _i<high; _i++ ) { \
6880c1aa95SSatish Balay         if (rp[_i] > bcol) break; \
6980c1aa95SSatish Balay         if (rp[_i] == bcol) { \
7080c1aa95SSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
71eada6651SSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
72eada6651SSatish Balay           else                    *bap  = value;  \
73ac7a638eSSatish Balay           goto a_noinsert; \
7480c1aa95SSatish Balay         } \
7580c1aa95SSatish Balay       } \
7689280ab3SLois Curfman McInnes       if (a->nonew == 1) goto a_noinsert; \
77a8c6a408SBarry Smith       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
7880c1aa95SSatish Balay       if (nrow >= rmax) { \
7980c1aa95SSatish Balay         /* there is no extra room in row, therefore enlarge */ \
8080c1aa95SSatish Balay         int    new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
8180c1aa95SSatish Balay         Scalar *new_a; \
8280c1aa95SSatish Balay  \
83a8c6a408SBarry Smith         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
8489280ab3SLois Curfman McInnes  \
8580c1aa95SSatish Balay         /* malloc new storage space */ \
8680c1aa95SSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); \
8780c1aa95SSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
8880c1aa95SSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
8980c1aa95SSatish Balay         new_i   = new_j + new_nz; \
9080c1aa95SSatish Balay  \
9180c1aa95SSatish Balay         /* copy over old data into new slots */ \
9280c1aa95SSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} \
9380c1aa95SSatish Balay         for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
9480c1aa95SSatish Balay         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); \
9580c1aa95SSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
9680c1aa95SSatish Balay         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, \
9780c1aa95SSatish Balay                                                            len*sizeof(int)); \
9880c1aa95SSatish Balay         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); \
9980c1aa95SSatish Balay         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
10080c1aa95SSatish Balay         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
10180c1aa95SSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar));  \
10280c1aa95SSatish Balay         /* free up old matrix storage */ \
10380c1aa95SSatish Balay         PetscFree(a->a);  \
10480c1aa95SSatish Balay         if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \
10580c1aa95SSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
10680c1aa95SSatish Balay         a->singlemalloc = 1; \
10780c1aa95SSatish Balay  \
10880c1aa95SSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
109ac7a638eSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
11080c1aa95SSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
11180c1aa95SSatish Balay         a->maxnz += bs2*CHUNKSIZE; \
11280c1aa95SSatish Balay         a->reallocs++; \
11380c1aa95SSatish Balay         a->nz++; \
11480c1aa95SSatish Balay       } \
11580c1aa95SSatish Balay       N = nrow++ - 1;  \
11680c1aa95SSatish Balay       /* shift up all the later entries in this row */ \
11780c1aa95SSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
11880c1aa95SSatish Balay         rp[ii+1] = rp[ii]; \
11980c1aa95SSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
12080c1aa95SSatish Balay       } \
12180c1aa95SSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
12280c1aa95SSatish Balay       rp[_i]                      = bcol;  \
12380c1aa95SSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
124ac7a638eSSatish Balay       a_noinsert:; \
12580c1aa95SSatish Balay     ailen[brow] = nrow; \
12680c1aa95SSatish Balay }
12757b952d6SSatish Balay 
128ac7a638eSSatish Balay #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
129ac7a638eSSatish Balay { \
130ac7a638eSSatish Balay  \
131ac7a638eSSatish Balay     brow = row/bs;  \
132ac7a638eSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
133ac7a638eSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
134ac7a638eSSatish Balay       bcol = col/bs; \
135ac7a638eSSatish Balay       ridx = row % bs; cidx = col % bs; \
136ac7a638eSSatish Balay       low = 0; high = nrow; \
137ac7a638eSSatish Balay       while (high-low > 3) { \
138ac7a638eSSatish Balay         t = (low+high)/2; \
139ac7a638eSSatish Balay         if (rp[t] > bcol) high = t; \
140ac7a638eSSatish Balay         else              low  = t; \
141ac7a638eSSatish Balay       } \
142ac7a638eSSatish Balay       for ( _i=low; _i<high; _i++ ) { \
143ac7a638eSSatish Balay         if (rp[_i] > bcol) break; \
144ac7a638eSSatish Balay         if (rp[_i] == bcol) { \
145ac7a638eSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
146ac7a638eSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
147ac7a638eSSatish Balay           else                    *bap  = value;  \
148ac7a638eSSatish Balay           goto b_noinsert; \
149ac7a638eSSatish Balay         } \
150ac7a638eSSatish Balay       } \
15189280ab3SLois Curfman McInnes       if (b->nonew == 1) goto b_noinsert; \
152a8c6a408SBarry Smith       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
153ac7a638eSSatish Balay       if (nrow >= rmax) { \
154ac7a638eSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
15574c639caSSatish Balay         int    new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
156ac7a638eSSatish Balay         Scalar *new_a; \
157ac7a638eSSatish Balay  \
158a8c6a408SBarry Smith         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
15989280ab3SLois Curfman McInnes  \
160ac7a638eSSatish Balay         /* malloc new storage space */ \
16174c639caSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(b->mbs+1)*sizeof(int); \
162ac7a638eSSatish Balay         new_a   = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \
163ac7a638eSSatish Balay         new_j   = (int *) (new_a + bs2*new_nz); \
164ac7a638eSSatish Balay         new_i   = new_j + new_nz; \
165ac7a638eSSatish Balay  \
166ac7a638eSSatish Balay         /* copy over old data into new slots */ \
167ac7a638eSSatish Balay         for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = bi[ii];} \
16874c639caSSatish Balay         for ( ii=brow+1; ii<b->mbs+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
169ac7a638eSSatish Balay         PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int)); \
170ac7a638eSSatish Balay         len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
171ac7a638eSSatish Balay         PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow, \
172ac7a638eSSatish Balay                                                            len*sizeof(int)); \
173ac7a638eSSatish Balay         PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(Scalar)); \
174ac7a638eSSatish Balay         PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); \
175ac7a638eSSatish Balay         PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
176ac7a638eSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(Scalar));  \
177ac7a638eSSatish Balay         /* free up old matrix storage */ \
17874c639caSSatish Balay         PetscFree(b->a);  \
17974c639caSSatish Balay         if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \
18074c639caSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
18174c639caSSatish Balay         b->singlemalloc = 1; \
182ac7a638eSSatish Balay  \
183ac7a638eSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
184ac7a638eSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
18574c639caSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); \
18674c639caSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
18774c639caSSatish Balay         b->reallocs++; \
18874c639caSSatish Balay         b->nz++; \
189ac7a638eSSatish Balay       } \
190ac7a638eSSatish Balay       N = nrow++ - 1;  \
191ac7a638eSSatish Balay       /* shift up all the later entries in this row */ \
192ac7a638eSSatish Balay       for ( ii=N; ii>=_i; ii-- ) { \
193ac7a638eSSatish Balay         rp[ii+1] = rp[ii]; \
194ac7a638eSSatish Balay         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); \
195ac7a638eSSatish Balay       } \
196ac7a638eSSatish Balay       if (N>=_i) PetscMemzero(ap+bs2*_i,bs2*sizeof(Scalar));  \
197ac7a638eSSatish Balay       rp[_i]                      = bcol;  \
198ac7a638eSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
199ac7a638eSSatish Balay       b_noinsert:; \
200ac7a638eSSatish Balay     bilen[brow] = nrow; \
201ac7a638eSSatish Balay }
202ac7a638eSSatish Balay 
2035615d1e5SSatish Balay #undef __FUNC__
2045615d1e5SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ"
205ec1ea8d8SLois Curfman McInnes int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
20657b952d6SSatish Balay {
20757b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
20857b952d6SSatish Balay   Scalar      value;
2094fa0d573SSatish Balay   int         ierr,i,j,row,col;
2104fa0d573SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
2114fa0d573SSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
2124fa0d573SSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
21357b952d6SSatish Balay 
214eada6651SSatish Balay   /* Some Variables required in the macro */
21580c1aa95SSatish Balay   Mat         A = baij->A;
21680c1aa95SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) (A)->data;
217ac7a638eSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
218ac7a638eSSatish Balay   Scalar      *aa=a->a;
219ac7a638eSSatish Balay 
220ac7a638eSSatish Balay   Mat         B = baij->B;
221ac7a638eSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ *) (B)->data;
222ac7a638eSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
223ac7a638eSSatish Balay   Scalar      *ba=b->a;
224ac7a638eSSatish Balay 
225ac7a638eSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
226ab26458aSBarry Smith   int         low,high,t,ridx,cidx,bs2=a->bs2;
227ac7a638eSSatish Balay   Scalar      *ap,*bap;
22880c1aa95SSatish Balay 
229d64ed03dSBarry Smith   PetscFunctionBegin;
23057b952d6SSatish Balay   for ( i=0; i<m; i++ ) {
2315ef9f2a5SBarry Smith     if (im[i] < 0) continue;
2323a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
233a8c6a408SBarry Smith     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
234639f9d9dSBarry Smith #endif
23557b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
23657b952d6SSatish Balay       row = im[i] - rstart_orig;
23757b952d6SSatish Balay       for ( j=0; j<n; j++ ) {
23857b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){
23957b952d6SSatish Balay           col = in[j] - cstart_orig;
24057b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
241f5e9677aSSatish Balay           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
24280c1aa95SSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
24357b952d6SSatish Balay         }
2445ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
2453a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
246a8c6a408SBarry Smith         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
247639f9d9dSBarry Smith #endif
24857b952d6SSatish Balay         else {
24957b952d6SSatish Balay           if (mat->was_assembled) {
250905e6a2fSBarry Smith             if (!baij->colmap) {
251905e6a2fSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
252905e6a2fSBarry Smith             }
25348e59246SSatish Balay #if defined (USE_CTABLE)
254fa46199cSSatish Balay             ierr = TableFind(baij->colmap, in[j]/bs + 1,&col); CHKERRQ(ierr);
255fa46199cSSatish Balay             col  = col - 1 + in[j]%bs;
25648e59246SSatish Balay #else
257905e6a2fSBarry Smith             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
25848e59246SSatish Balay #endif
25957b952d6SSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
26057b952d6SSatish Balay               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
26157b952d6SSatish Balay               col =  in[j];
2629bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2639bf004c3SSatish Balay               B = baij->B;
2649bf004c3SSatish Balay               b = (Mat_SeqBAIJ *) (B)->data;
2659bf004c3SSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
2669bf004c3SSatish Balay               ba=b->a;
26757b952d6SSatish Balay             }
268d64ed03dSBarry Smith           } else col = in[j];
26957b952d6SSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
270ac7a638eSSatish Balay           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
271ac7a638eSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
27257b952d6SSatish Balay         }
27357b952d6SSatish Balay       }
274d64ed03dSBarry Smith     } else {
27590f02eecSBarry Smith       if (roworiented && !baij->donotstash) {
27657b952d6SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
277d64ed03dSBarry Smith       } else {
27890f02eecSBarry Smith         if (!baij->donotstash) {
27957b952d6SSatish Balay           row = im[i];
28057b952d6SSatish Balay 	  for ( j=0; j<n; j++ ) {
28157b952d6SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
28257b952d6SSatish Balay           }
28357b952d6SSatish Balay         }
28457b952d6SSatish Balay       }
28557b952d6SSatish Balay     }
28690f02eecSBarry Smith   }
2873a40ed3dSBarry Smith   PetscFunctionReturn(0);
28857b952d6SSatish Balay }
28957b952d6SSatish Balay 
290ab26458aSBarry Smith #undef __FUNC__
291ab26458aSBarry Smith #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ"
292ec1ea8d8SLois Curfman McInnes int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
293ab26458aSBarry Smith {
294ab26458aSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
29530793edcSSatish Balay   Scalar      *value,*barray=baij->barray;
296abef11f7SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
297ab26458aSBarry Smith   int         roworiented = baij->roworiented,rstart=baij->rstart ;
298ab26458aSBarry Smith   int         rend=baij->rend,cstart=baij->cstart,stepval;
299ab26458aSBarry Smith   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
300ab26458aSBarry Smith 
30130793edcSSatish Balay   if(!barray) {
30247513183SBarry Smith     baij->barray = barray = (Scalar*) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(barray);
30330793edcSSatish Balay   }
30430793edcSSatish Balay 
305ab26458aSBarry Smith   if (roworiented) {
306ab26458aSBarry Smith     stepval = (n-1)*bs;
307ab26458aSBarry Smith   } else {
308ab26458aSBarry Smith     stepval = (m-1)*bs;
309ab26458aSBarry Smith   }
310ab26458aSBarry Smith   for ( i=0; i<m; i++ ) {
3115ef9f2a5SBarry Smith     if (im[i] < 0) continue;
3123a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
3135ef9f2a5SBarry Smith     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
314ab26458aSBarry Smith #endif
315ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
316ab26458aSBarry Smith       row = im[i] - rstart;
317ab26458aSBarry Smith       for ( j=0; j<n; j++ ) {
31815b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
31915b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
32015b57d14SSatish Balay           barray = v + i*bs2;
32115b57d14SSatish Balay         } else if((!roworiented) && (m == 1)) {
32215b57d14SSatish Balay           barray = v + j*bs2;
32315b57d14SSatish Balay         } else { /* Here a copy is required */
324ab26458aSBarry Smith           if (roworiented) {
325ab26458aSBarry Smith             value = v + i*(stepval+bs)*bs + j*bs;
326ab26458aSBarry Smith           } else {
327ab26458aSBarry Smith             value = v + j*(stepval+bs)*bs + i*bs;
328abef11f7SSatish Balay           }
32947513183SBarry Smith           for ( ii=0; ii<bs; ii++,value+=stepval ) {
33047513183SBarry Smith             for (jj=0; jj<bs; jj++ ) {
33130793edcSSatish Balay               *barray++  = *value++;
33247513183SBarry Smith             }
33347513183SBarry Smith           }
33430793edcSSatish Balay           barray -=bs2;
33515b57d14SSatish Balay         }
336abef11f7SSatish Balay 
337abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend){
338abef11f7SSatish Balay           col  = in[j] - cstart;
33930793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
340ab26458aSBarry Smith         }
3415ef9f2a5SBarry Smith         else if (in[j] < 0) continue;
3423a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
3435ef9f2a5SBarry Smith         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
344ab26458aSBarry Smith #endif
345ab26458aSBarry Smith         else {
346ab26458aSBarry Smith           if (mat->was_assembled) {
347ab26458aSBarry Smith             if (!baij->colmap) {
348ab26458aSBarry Smith               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
349ab26458aSBarry Smith             }
350a5eb4965SSatish Balay 
3513a40ed3dSBarry Smith #if defined(USE_PETSC_BOPT_g)
35248e59246SSatish Balay #if defined (USE_CTABLE)
353fa46199cSSatish Balay             { int data;
354fa46199cSSatish Balay             ierr = TableFind(baij->colmap,in[j]+1,&data); CHKERRQ(ierr);
355fa46199cSSatish Balay             if((data - 1) % bs)
35648e59246SSatish Balay 	      {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
357fa46199cSSatish Balay             }
35848e59246SSatish Balay #else
359a8c6a408SBarry Smith             if ((baij->colmap[in[j]] - 1) % bs) {SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");}
360a5eb4965SSatish Balay #endif
36148e59246SSatish Balay #endif
36248e59246SSatish Balay #if defined (USE_CTABLE)
363fa46199cSSatish Balay 	    ierr = TableFind(baij->colmap,in[j]+1,&col); CHKERRQ(ierr);
364fa46199cSSatish Balay             col  = (col - 1)/bs;
36548e59246SSatish Balay #else
366a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
36748e59246SSatish Balay #endif
368ab26458aSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
369ab26458aSBarry Smith               ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
370ab26458aSBarry Smith               col =  in[j];
371ab26458aSBarry Smith             }
372ab26458aSBarry Smith           }
373ab26458aSBarry Smith           else col = in[j];
37430793edcSSatish Balay           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
375ab26458aSBarry Smith         }
376ab26458aSBarry Smith       }
377d64ed03dSBarry Smith     } else {
378ab26458aSBarry Smith       if (!baij->donotstash) {
379ab26458aSBarry Smith         if (roworiented ) {
380abef11f7SSatish Balay           row   = im[i]*bs;
381abef11f7SSatish Balay           value = v + i*(stepval+bs)*bs;
382abef11f7SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
383abef11f7SSatish Balay             for ( k=0; k<n; k++ ) {
384abef11f7SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
385abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
386abef11f7SSatish Balay               }
387ab26458aSBarry Smith             }
388ab26458aSBarry Smith           }
389d64ed03dSBarry Smith         } else {
390ab26458aSBarry Smith           for ( j=0; j<n; j++ ) {
391abef11f7SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
392abef11f7SSatish Balay             col   = in[j]*bs;
393abef11f7SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
394abef11f7SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
395abef11f7SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
396ab26458aSBarry Smith               }
397ab26458aSBarry Smith             }
398ab26458aSBarry Smith           }
399abef11f7SSatish Balay         }
400abef11f7SSatish Balay       }
401ab26458aSBarry Smith     }
402ab26458aSBarry Smith   }
4033a40ed3dSBarry Smith   PetscFunctionReturn(0);
404ab26458aSBarry Smith }
4050bdbc534SSatish Balay #define HASH_KEY 0.6180339887
406c2760754SSatish Balay /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
407c2760754SSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
408c2760754SSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
4095615d1e5SSatish Balay #undef __FUNC__
4100bdbc534SSatish Balay #define __FUNC__ "MatSetValues_MPIBAIJ_HT"
4110bdbc534SSatish Balay int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4120bdbc534SSatish Balay {
4130bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4140bdbc534SSatish Balay   int         ierr,i,j,row,col;
4150bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
416c2760754SSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
417c2760754SSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
418c2760754SSatish Balay   double      tmp;
419b9e4cc15SSatish Balay   Scalar      ** HD = baij->hd,value;
4204a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
4214a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
4224a15367fSSatish Balay #endif
4230bdbc534SSatish Balay 
4240bdbc534SSatish Balay   PetscFunctionBegin;
4250bdbc534SSatish Balay 
4260bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
4270bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
4280bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
4290bdbc534SSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
4300bdbc534SSatish Balay #endif
4310bdbc534SSatish Balay       row = im[i];
432c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4330bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
4340bdbc534SSatish Balay         col = in[j];
4350bdbc534SSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4360bdbc534SSatish Balay         /* Look up into the Hash Table */
437c2760754SSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
438c2760754SSatish Balay         h1  = HASH(size,key,tmp);
4390bdbc534SSatish Balay 
440c2760754SSatish Balay 
441c2760754SSatish Balay         idx = h1;
442187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
443187ce0cbSSatish Balay         insert_ct++;
444187ce0cbSSatish Balay         total_ct++;
445187ce0cbSSatish Balay         if (HT[idx] != key) {
446187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
447187ce0cbSSatish Balay           if (idx == size) {
448187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
449187ce0cbSSatish Balay             if (idx == h1) {
450187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
451187ce0cbSSatish Balay             }
452187ce0cbSSatish Balay           }
453187ce0cbSSatish Balay         }
454187ce0cbSSatish Balay #else
455c2760754SSatish Balay         if (HT[idx] != key) {
456c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
457c2760754SSatish Balay           if (idx == size) {
458c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
459c2760754SSatish Balay             if (idx == h1) {
460c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
461c2760754SSatish Balay             }
462c2760754SSatish Balay           }
463c2760754SSatish Balay         }
464187ce0cbSSatish Balay #endif
465c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
466c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
467c2760754SSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
4680bdbc534SSatish Balay       }
4690bdbc534SSatish Balay     } else {
4700bdbc534SSatish Balay       if (roworiented && !baij->donotstash) {
4710bdbc534SSatish Balay         ierr = StashValues_Private(&baij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr);
4720bdbc534SSatish Balay       } else {
4730bdbc534SSatish Balay         if (!baij->donotstash) {
4740bdbc534SSatish Balay           row = im[i];
4750bdbc534SSatish Balay 	  for ( j=0; j<n; j++ ) {
4760bdbc534SSatish Balay 	    ierr = StashValues_Private(&baij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr);
4770bdbc534SSatish Balay           }
4780bdbc534SSatish Balay         }
4790bdbc534SSatish Balay       }
4800bdbc534SSatish Balay     }
4810bdbc534SSatish Balay   }
482187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
483187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
484187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
485187ce0cbSSatish Balay #endif
4860bdbc534SSatish Balay   PetscFunctionReturn(0);
4870bdbc534SSatish Balay }
4880bdbc534SSatish Balay 
4890bdbc534SSatish Balay #undef __FUNC__
4900bdbc534SSatish Balay #define __FUNC__ "MatSetValuesBlocked_MPIBAIJ_HT"
4910bdbc534SSatish Balay int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
4920bdbc534SSatish Balay {
4930bdbc534SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
4940bdbc534SSatish Balay   int         ierr,i,j,ii,jj,row,col,k,l;
4950bdbc534SSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
496b4cc0f5aSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
497c2760754SSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
498c2760754SSatish Balay   double      tmp;
499187ce0cbSSatish Balay   Scalar      ** HD = baij->hd,*value,*v_t,*baij_a;
5004a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
5014a15367fSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
5024a15367fSSatish Balay #endif
5030bdbc534SSatish Balay 
504d0a41580SSatish Balay   PetscFunctionBegin;
505d0a41580SSatish Balay 
5060bdbc534SSatish Balay   if (roworiented) {
5070bdbc534SSatish Balay     stepval = (n-1)*bs;
5080bdbc534SSatish Balay   } else {
5090bdbc534SSatish Balay     stepval = (m-1)*bs;
5100bdbc534SSatish Balay   }
5110bdbc534SSatish Balay   for ( i=0; i<m; i++ ) {
5120bdbc534SSatish Balay #if defined(USE_PETSC_BOPT_g)
5130bdbc534SSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
5140bdbc534SSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
5150bdbc534SSatish Balay #endif
5160bdbc534SSatish Balay     row   = im[i];
517187ce0cbSSatish Balay     v_t   = v + i*bs2;
518c2760754SSatish Balay     if (row >= rstart && row < rend) {
5190bdbc534SSatish Balay       for ( j=0; j<n; j++ ) {
5200bdbc534SSatish Balay         col = in[j];
5210bdbc534SSatish Balay 
5220bdbc534SSatish Balay         /* Look up into the Hash Table */
523c2760754SSatish Balay         key = row*Nbs+col+1;
524c2760754SSatish Balay         h1  = HASH(size,key,tmp);
5250bdbc534SSatish Balay 
526c2760754SSatish Balay         idx = h1;
527187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
528187ce0cbSSatish Balay         total_ct++;
529187ce0cbSSatish Balay         insert_ct++;
530187ce0cbSSatish Balay        if (HT[idx] != key) {
531187ce0cbSSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
532187ce0cbSSatish Balay           if (idx == size) {
533187ce0cbSSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
534187ce0cbSSatish Balay             if (idx == h1) {
535187ce0cbSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
536187ce0cbSSatish Balay             }
537187ce0cbSSatish Balay           }
538187ce0cbSSatish Balay         }
539187ce0cbSSatish Balay #else
540c2760754SSatish Balay         if (HT[idx] != key) {
541c2760754SSatish Balay           for ( idx=h1; (idx<size) && (HT[idx]!=key); idx++);
542c2760754SSatish Balay           if (idx == size) {
543c2760754SSatish Balay             for ( idx=0; (idx<h1) && (HT[idx]!=key); idx++);
544c2760754SSatish Balay             if (idx == h1) {
545c2760754SSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
546c2760754SSatish Balay             }
547c2760754SSatish Balay           }
548c2760754SSatish Balay         }
549187ce0cbSSatish Balay #endif
550c2760754SSatish Balay         baij_a = HD[idx];
5510bdbc534SSatish Balay         if (roworiented) {
552c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
553187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
554187ce0cbSSatish Balay           value = v_t;
555187ce0cbSSatish Balay           v_t  += bs;
556fef45726SSatish Balay           if (addv == ADD_VALUES) {
557c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
558c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
559fef45726SSatish Balay                 baij_a[jj]  += *value++;
560b4cc0f5aSSatish Balay               }
561b4cc0f5aSSatish Balay             }
562fef45726SSatish Balay           } else {
563c2760754SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval) {
564c2760754SSatish Balay               for ( jj=ii; jj<bs2; jj+=bs ) {
565fef45726SSatish Balay                 baij_a[jj]  = *value++;
566fef45726SSatish Balay               }
567fef45726SSatish Balay             }
568fef45726SSatish Balay           }
5690bdbc534SSatish Balay         } else {
5700bdbc534SSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
571fef45726SSatish Balay           if (addv == ADD_VALUES) {
572b4cc0f5aSSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
5730bdbc534SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
574fef45726SSatish Balay                 baij_a[jj]  += *value++;
575fef45726SSatish Balay               }
576fef45726SSatish Balay             }
577fef45726SSatish Balay           } else {
578fef45726SSatish Balay             for ( ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs ) {
579fef45726SSatish Balay               for ( jj=0; jj<bs; jj++ ) {
580fef45726SSatish Balay                 baij_a[jj]  = *value++;
581fef45726SSatish Balay               }
582b4cc0f5aSSatish Balay             }
5830bdbc534SSatish Balay           }
5840bdbc534SSatish Balay         }
5850bdbc534SSatish Balay       }
5860bdbc534SSatish Balay     } else {
5870bdbc534SSatish Balay       if (!baij->donotstash) {
5880bdbc534SSatish Balay         if (roworiented ) {
5890bdbc534SSatish Balay           row   = im[i]*bs;
5900bdbc534SSatish Balay           value = v + i*(stepval+bs)*bs;
5910bdbc534SSatish Balay           for ( j=0; j<bs; j++,row++ ) {
5920bdbc534SSatish Balay             for ( k=0; k<n; k++ ) {
5930bdbc534SSatish Balay               for ( col=in[k]*bs,l=0; l<bs; l++,col++) {
5940bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
5950bdbc534SSatish Balay               }
5960bdbc534SSatish Balay             }
5970bdbc534SSatish Balay           }
5980bdbc534SSatish Balay         } else {
5990bdbc534SSatish Balay           for ( j=0; j<n; j++ ) {
6000bdbc534SSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
6010bdbc534SSatish Balay             col   = in[j]*bs;
6020bdbc534SSatish Balay             for ( k=0; k<bs; k++,col++,value+=stepval) {
6030bdbc534SSatish Balay               for ( row = im[i]*bs,l=0; l<bs; l++,row++) {
6040bdbc534SSatish Balay                 ierr = StashValues_Private(&baij->stash,row,1,&col,value++,addv);CHKERRQ(ierr);
6050bdbc534SSatish Balay               }
6060bdbc534SSatish Balay             }
6070bdbc534SSatish Balay           }
6080bdbc534SSatish Balay         }
6090bdbc534SSatish Balay       }
6100bdbc534SSatish Balay     }
6110bdbc534SSatish Balay   }
612187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
613187ce0cbSSatish Balay   baij->ht_total_ct = total_ct;
614187ce0cbSSatish Balay   baij->ht_insert_ct = insert_ct;
615187ce0cbSSatish Balay #endif
6160bdbc534SSatish Balay   PetscFunctionReturn(0);
6170bdbc534SSatish Balay }
618133cdb44SSatish Balay 
6190bdbc534SSatish Balay #undef __FUNC__
6205615d1e5SSatish Balay #define __FUNC__ "MatGetValues_MPIBAIJ"
621ec1ea8d8SLois Curfman McInnes int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
622d6de1c52SSatish Balay {
623d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
624d6de1c52SSatish Balay   int        bs=baij->bs,ierr,i,j, bsrstart = baij->rstart*bs, bsrend = baij->rend*bs;
62548e59246SSatish Balay   int        bscstart = baij->cstart*bs, bscend = baij->cend*bs,row,col,data;
626d6de1c52SSatish Balay 
627133cdb44SSatish Balay   PetscFunctionBegin;
628d6de1c52SSatish Balay   for ( i=0; i<m; i++ ) {
629a8c6a408SBarry Smith     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
630a8c6a408SBarry Smith     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
631d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
632d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
633d6de1c52SSatish Balay       for ( j=0; j<n; j++ ) {
634a8c6a408SBarry Smith         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
635a8c6a408SBarry Smith         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
636d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
637d6de1c52SSatish Balay           col = idxn[j] - bscstart;
63898dd23e9SBarry Smith           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
639d64ed03dSBarry Smith         } else {
640905e6a2fSBarry Smith           if (!baij->colmap) {
641905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
642905e6a2fSBarry Smith           }
64348e59246SSatish Balay #if defined (USE_CTABLE)
644fa46199cSSatish Balay           ierr = TableFind(baij->colmap,idxn[j]/bs+1,&data); CHKERRQ(ierr);
645fa46199cSSatish Balay           data --;
64648e59246SSatish Balay #else
64748e59246SSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
64848e59246SSatish Balay #endif
64948e59246SSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
650d9d09a02SSatish Balay           else {
65148e59246SSatish Balay             col  = data + idxn[j]%bs;
65298dd23e9SBarry Smith             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr);
653d6de1c52SSatish Balay           }
654d6de1c52SSatish Balay         }
655d6de1c52SSatish Balay       }
656d64ed03dSBarry Smith     } else {
657a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
658d6de1c52SSatish Balay     }
659d6de1c52SSatish Balay   }
6603a40ed3dSBarry Smith  PetscFunctionReturn(0);
661d6de1c52SSatish Balay }
662d6de1c52SSatish Balay 
6635615d1e5SSatish Balay #undef __FUNC__
6645615d1e5SSatish Balay #define __FUNC__ "MatNorm_MPIBAIJ"
665ec1ea8d8SLois Curfman McInnes int MatNorm_MPIBAIJ(Mat mat,NormType type,double *norm)
666d6de1c52SSatish Balay {
667d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
668d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*) baij->A->data, *bmat = (Mat_SeqBAIJ*) baij->B->data;
669acdf5bf4SSatish Balay   int        ierr, i,bs2=baij->bs2;
670d6de1c52SSatish Balay   double     sum = 0.0;
671d6de1c52SSatish Balay   Scalar     *v;
672d6de1c52SSatish Balay 
673d64ed03dSBarry Smith   PetscFunctionBegin;
674d6de1c52SSatish Balay   if (baij->size == 1) {
675d6de1c52SSatish Balay     ierr =  MatNorm(baij->A,type,norm); CHKERRQ(ierr);
676d6de1c52SSatish Balay   } else {
677d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
678d6de1c52SSatish Balay       v = amat->a;
679d6de1c52SSatish Balay       for (i=0; i<amat->nz*bs2; i++ ) {
6803a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
681e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
682d6de1c52SSatish Balay #else
683d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
684d6de1c52SSatish Balay #endif
685d6de1c52SSatish Balay       }
686d6de1c52SSatish Balay       v = bmat->a;
687d6de1c52SSatish Balay       for (i=0; i<bmat->nz*bs2; i++ ) {
6883a40ed3dSBarry Smith #if defined(USE_PETSC_COMPLEX)
689e20fef11SSatish Balay         sum += PetscReal(PetscConj(*v)*(*v)); v++;
690d6de1c52SSatish Balay #else
691d6de1c52SSatish Balay         sum += (*v)*(*v); v++;
692d6de1c52SSatish Balay #endif
693d6de1c52SSatish Balay       }
694ca161407SBarry Smith       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
695d6de1c52SSatish Balay       *norm = sqrt(*norm);
696d64ed03dSBarry Smith     } else {
697e3372554SBarry Smith       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
698d6de1c52SSatish Balay     }
699d64ed03dSBarry Smith   }
7003a40ed3dSBarry Smith   PetscFunctionReturn(0);
701d6de1c52SSatish Balay }
70257b952d6SSatish Balay 
703bd7f49f5SSatish Balay 
704fef45726SSatish Balay /*
705fef45726SSatish Balay   Creates the hash table, and sets the table
706fef45726SSatish Balay   This table is created only once.
707fef45726SSatish Balay   If new entried need to be added to the matrix
708fef45726SSatish Balay   then the hash table has to be destroyed and
709fef45726SSatish Balay   recreated.
710fef45726SSatish Balay */
711fef45726SSatish Balay #undef __FUNC__
712fef45726SSatish Balay #define __FUNC__ "MatCreateHashTable_MPIBAIJ_Private"
713d0a41580SSatish Balay int MatCreateHashTable_MPIBAIJ_Private(Mat mat,double factor)
714596b8d2eSBarry Smith {
715596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
716596b8d2eSBarry Smith   Mat         A = baij->A, B=baij->B;
717596b8d2eSBarry Smith   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data, *b=(Mat_SeqBAIJ *)B->data;
7180bdbc534SSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
7194a15367fSSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart;
720187ce0cbSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
721fef45726SSatish Balay   int         *HT,key;
7220bdbc534SSatish Balay   Scalar      **HD;
723c2760754SSatish Balay   double      tmp;
7244a15367fSSatish Balay #if defined(USE_PETSC_BOPT_g)
7254a15367fSSatish Balay   int         ct=0,max=0;
7264a15367fSSatish Balay #endif
727fef45726SSatish Balay 
728d64ed03dSBarry Smith   PetscFunctionBegin;
7290bdbc534SSatish Balay   baij->ht_size=(int)(factor*nz);
7300bdbc534SSatish Balay   size = baij->ht_size;
731fef45726SSatish Balay 
7320bdbc534SSatish Balay   if (baij->ht) {
7330bdbc534SSatish Balay     PetscFunctionReturn(0);
734596b8d2eSBarry Smith   }
7350bdbc534SSatish Balay 
736fef45726SSatish Balay   /* Allocate Memory for Hash Table */
737b9e4cc15SSatish Balay   baij->hd = (Scalar**)PetscMalloc((size)*(sizeof(int)+sizeof(Scalar*))+1); CHKPTRQ(baij->hd);
738b9e4cc15SSatish Balay   baij->ht = (int*)(baij->hd + size);
739b9e4cc15SSatish Balay   HD = baij->hd;
740a07cd24cSSatish Balay   HT = baij->ht;
741b9e4cc15SSatish Balay 
742b9e4cc15SSatish Balay 
743c2760754SSatish Balay   PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));
7440bdbc534SSatish Balay 
745596b8d2eSBarry Smith 
746596b8d2eSBarry Smith   /* Loop Over A */
7470bdbc534SSatish Balay   for ( i=0; i<a->mbs; i++ ) {
748596b8d2eSBarry Smith     for ( j=ai[i]; j<ai[i+1]; j++ ) {
7490bdbc534SSatish Balay       row = i+rstart;
7500bdbc534SSatish Balay       col = aj[j]+cstart;
751596b8d2eSBarry Smith 
752187ce0cbSSatish Balay       key = row*Nbs + col + 1;
753c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7540bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7550bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7560bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7570bdbc534SSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
758596b8d2eSBarry Smith           break;
759187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
760187ce0cbSSatish Balay         } else {
761187ce0cbSSatish Balay           ct++;
762187ce0cbSSatish Balay #endif
763596b8d2eSBarry Smith         }
764187ce0cbSSatish Balay       }
765187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
766187ce0cbSSatish Balay       if (k> max) max = k;
767187ce0cbSSatish Balay #endif
768596b8d2eSBarry Smith     }
769596b8d2eSBarry Smith   }
770596b8d2eSBarry Smith   /* Loop Over B */
7710bdbc534SSatish Balay   for ( i=0; i<b->mbs; i++ ) {
772596b8d2eSBarry Smith     for ( j=bi[i]; j<bi[i+1]; j++ ) {
7730bdbc534SSatish Balay       row = i+rstart;
7740bdbc534SSatish Balay       col = garray[bj[j]];
775187ce0cbSSatish Balay       key = row*Nbs + col + 1;
776c2760754SSatish Balay       h1  = HASH(size,key,tmp);
7770bdbc534SSatish Balay       for ( k=0; k<size; k++ ){
7780bdbc534SSatish Balay         if (HT[(h1+k)%size] == 0.0) {
7790bdbc534SSatish Balay           HT[(h1+k)%size] = key;
7800bdbc534SSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
781596b8d2eSBarry Smith           break;
782187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
783187ce0cbSSatish Balay         } else {
784187ce0cbSSatish Balay           ct++;
785187ce0cbSSatish Balay #endif
786596b8d2eSBarry Smith         }
787187ce0cbSSatish Balay       }
788187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
789187ce0cbSSatish Balay       if (k> max) max = k;
790187ce0cbSSatish Balay #endif
791596b8d2eSBarry Smith     }
792596b8d2eSBarry Smith   }
793596b8d2eSBarry Smith 
794596b8d2eSBarry Smith   /* Print Summary */
795187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
796c2760754SSatish Balay   for ( i=0,j=0; i<size; i++)
797596b8d2eSBarry Smith     if (HT[i]) {j++;}
798187ce0cbSSatish Balay   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",
799187ce0cbSSatish Balay            (j== 0)? 0.0:((double)(ct+j))/j,max);
800187ce0cbSSatish Balay #endif
8013a40ed3dSBarry Smith   PetscFunctionReturn(0);
802596b8d2eSBarry Smith }
80357b952d6SSatish Balay 
804*bbb85fb3SSatish Balay #if defined (__JUNK__)
805*bbb85fb3SSatish Balay #undef __FUNC__
806*bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
807*bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
808*bbb85fb3SSatish Balay {
809*bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
810*bbb85fb3SSatish Balay   MPI_Comm    comm = mat->comm;
811*bbb85fb3SSatish Balay   int         size = baij->size, *owners = baij->rowners,bs=baij->bs;
812*bbb85fb3SSatish Balay   int         rank = baij->rank,tag = mat->tag, *owner,*starts,count,ierr;
813*bbb85fb3SSatish Balay   MPI_Request *send_waits,*recv_waits;
814*bbb85fb3SSatish Balay   int         *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work;
815*bbb85fb3SSatish Balay   InsertMode  addv;
816*bbb85fb3SSatish Balay   Scalar      *rvalues,*svalues;
817*bbb85fb3SSatish Balay 
818*bbb85fb3SSatish Balay   PetscFunctionBegin;
819*bbb85fb3SSatish Balay   if (baij->donotstash) {
820*bbb85fb3SSatish Balay     baij->svalues    = 0; baij->rvalues    = 0;
821*bbb85fb3SSatish Balay     baij->nsends     = 0; baij->nrecvs     = 0;
822*bbb85fb3SSatish Balay     baij->send_waits = 0; baij->recv_waits = 0;
823*bbb85fb3SSatish Balay     baij->rmax       = 0;
824*bbb85fb3SSatish Balay     PetscFunctionReturn(0);
825*bbb85fb3SSatish Balay   }
826*bbb85fb3SSatish Balay 
827*bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
828*bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr);
829*bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
830*bbb85fb3SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
831*bbb85fb3SSatish Balay   }
832*bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
833*bbb85fb3SSatish Balay 
834*bbb85fb3SSatish Balay   /*  first count number of contributors to each processor */
835*bbb85fb3SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
836*bbb85fb3SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
837*bbb85fb3SSatish Balay   owner = (int *) PetscMalloc( (baij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner);
838*bbb85fb3SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
839*bbb85fb3SSatish Balay     idx = baij->stash.idx[i];
840*bbb85fb3SSatish Balay     for ( j=0; j<size; j++ ) {
841*bbb85fb3SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
842*bbb85fb3SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; break;
843*bbb85fb3SSatish Balay       }
844*bbb85fb3SSatish Balay     }
845*bbb85fb3SSatish Balay   }
846*bbb85fb3SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
847*bbb85fb3SSatish Balay 
848*bbb85fb3SSatish Balay   /* inform other processors of number of messages and max length*/
849*bbb85fb3SSatish Balay   work      = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
850*bbb85fb3SSatish Balay   ierr      = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
851*bbb85fb3SSatish Balay   nreceives = work[rank];
852*bbb85fb3SSatish Balay   ierr      = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
853*bbb85fb3SSatish Balay   nmax      = work[rank];
854*bbb85fb3SSatish Balay   PetscFree(work);
855*bbb85fb3SSatish Balay 
856*bbb85fb3SSatish Balay   /* post receives:
857*bbb85fb3SSatish Balay        1) each message will consist of ordered pairs
858*bbb85fb3SSatish Balay      (global index,value) we store the global index as a double
859*bbb85fb3SSatish Balay      to simplify the message passing.
860*bbb85fb3SSatish Balay        2) since we don't know how long each individual message is we
861*bbb85fb3SSatish Balay      allocate the largest needed buffer for each receive. Potentially
862*bbb85fb3SSatish Balay      this is a lot of wasted space.
863*bbb85fb3SSatish Balay 
864*bbb85fb3SSatish Balay 
865*bbb85fb3SSatish Balay        This could be done better.
866*bbb85fb3SSatish Balay   */
867*bbb85fb3SSatish Balay   rvalues    = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues);
868*bbb85fb3SSatish Balay   recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
869*bbb85fb3SSatish Balay   for ( i=0; i<nreceives; i++ ) {
870*bbb85fb3SSatish Balay     ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
871*bbb85fb3SSatish Balay   }
872*bbb85fb3SSatish Balay 
873*bbb85fb3SSatish Balay   /* do sends:
874*bbb85fb3SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
875*bbb85fb3SSatish Balay          the ith processor
876*bbb85fb3SSatish Balay   */
877*bbb85fb3SSatish Balay   svalues    = (Scalar *) PetscMalloc(3*(baij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues);
878*bbb85fb3SSatish Balay   send_waits = (MPI_Request *) PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
879*bbb85fb3SSatish Balay   starts     = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts);
880*bbb85fb3SSatish Balay   starts[0] = 0;
881*bbb85fb3SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
882*bbb85fb3SSatish Balay   for ( i=0; i<baij->stash.n; i++ ) {
883*bbb85fb3SSatish Balay     svalues[3*starts[owner[i]]]       = (Scalar)  baij->stash.idx[i];
884*bbb85fb3SSatish Balay     svalues[3*starts[owner[i]]+1]     = (Scalar)  baij->stash.idy[i];
885*bbb85fb3SSatish Balay     svalues[3*(starts[owner[i]]++)+2] =  baij->stash.array[i];
886*bbb85fb3SSatish Balay   }
887*bbb85fb3SSatish Balay   PetscFree(owner);
888*bbb85fb3SSatish Balay   starts[0] = 0;
889*bbb85fb3SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
890*bbb85fb3SSatish Balay   count = 0;
891*bbb85fb3SSatish Balay   for ( i=0; i<size; i++ ) {
892*bbb85fb3SSatish Balay     if (procs[i]) {
893*bbb85fb3SSatish Balay       ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
894*bbb85fb3SSatish Balay     }
895*bbb85fb3SSatish Balay   }
896*bbb85fb3SSatish Balay   PetscFree(starts); PetscFree(nprocs);
897*bbb85fb3SSatish Balay 
898*bbb85fb3SSatish Balay   /* Free cache space */
899*bbb85fb3SSatish Balay   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
900*bbb85fb3SSatish Balay   ierr = StashReset_Private(&baij->stash); CHKERRQ(ierr);
901*bbb85fb3SSatish Balay 
902*bbb85fb3SSatish Balay   baij->svalues    = svalues;    baij->rvalues    = rvalues;
903*bbb85fb3SSatish Balay   baij->nsends     = nsends;     baij->nrecvs     = nreceives;
904*bbb85fb3SSatish Balay   baij->send_waits = send_waits; baij->recv_waits = recv_waits;
905*bbb85fb3SSatish Balay   baij->rmax       = nmax;
906*bbb85fb3SSatish Balay 
907*bbb85fb3SSatish Balay   PetscFunctionReturn(0);
908*bbb85fb3SSatish Balay }
9094a69d603SSatish Balay 
9105615d1e5SSatish Balay #undef __FUNC__
9115615d1e5SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
912ec1ea8d8SLois Curfman McInnes int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
91357b952d6SSatish Balay {
91457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
91557b952d6SSatish Balay   MPI_Status  *send_status,recv_status;
916*bbb85fb3SSatish Balay   int         imdex,count = nrecvs, i, n, ierr;
917b7029e64SSatish Balay   int         bs=baij->bs,row,col,other_disassembled;
918*bbb85fb3SSatish Balay   Scalar      val;
919e0fa3b82SLois Curfman McInnes   InsertMode  addv = mat->insertmode;
92057b952d6SSatish Balay 
921d64ed03dSBarry Smith   PetscFunctionBegin;
92257b952d6SSatish Balay   /*  wait on receives */
92357b952d6SSatish Balay   while (count) {
924ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,baij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
92557b952d6SSatish Balay     /* unpack receives into our local space */
92657b952d6SSatish Balay     values = baij->rvalues + 3*imdex*baij->rmax;
927ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr);
92857b952d6SSatish Balay     n = n/3;
92957b952d6SSatish Balay     for ( i=0; i<n; i++ ) {
93057b952d6SSatish Balay       row = (int) PetscReal(values[3*i]) - baij->rstart*bs;
93157b952d6SSatish Balay       col = (int) PetscReal(values[3*i+1]);
93257b952d6SSatish Balay       val = values[3*i+2];
93357b952d6SSatish Balay       if (col >= baij->cstart*bs && col < baij->cend*bs) {
93457b952d6SSatish Balay         col -= baij->cstart*bs;
9354a69d603SSatish Balay         ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
936d64ed03dSBarry Smith       } else {
93757b952d6SSatish Balay         if (mat->was_assembled) {
938905e6a2fSBarry Smith           if (!baij->colmap) {
939905e6a2fSBarry Smith             ierr = CreateColmap_MPIBAIJ_Private(mat); CHKERRQ(ierr);
940905e6a2fSBarry Smith           }
94148e59246SSatish Balay #if defined (USE_CTABLE)
942fa46199cSSatish Balay 	  ierr = TableFind(baij->colmap,col/bs+1,&col); CHKERRQ(ierr);
943fa46199cSSatish Balay           col  = col - 1 + col%bs;
94448e59246SSatish Balay #else
945a5eb4965SSatish Balay           col = (baij->colmap[col/bs]) - 1 + col%bs;
94648e59246SSatish Balay #endif
94757b952d6SSatish Balay           if (col < 0  && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
94857b952d6SSatish Balay             ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
94957b952d6SSatish Balay             col = (int) PetscReal(values[3*i+1]);
95057b952d6SSatish Balay           }
95157b952d6SSatish Balay         }
9524a69d603SSatish Balay         ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr)
95357b952d6SSatish Balay       }
95457b952d6SSatish Balay     }
95557b952d6SSatish Balay     count--;
95657b952d6SSatish Balay   }
957570da906SBarry Smith   if (baij->recv_waits) PetscFree(baij->recv_waits);
958570da906SBarry Smith   if (baij->rvalues)    PetscFree(baij->rvalues);
95957b952d6SSatish Balay 
96057b952d6SSatish Balay   /* wait on sends */
96157b952d6SSatish Balay   if (baij->nsends) {
962d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(baij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
963ca161407SBarry Smith     ierr        = MPI_Waitall(baij->nsends,baij->send_waits,send_status);CHKERRQ(ierr);
96457b952d6SSatish Balay     PetscFree(send_status);
96557b952d6SSatish Balay   }
966570da906SBarry Smith   if (baij->send_waits) PetscFree(baij->send_waits);
967570da906SBarry Smith   if (baij->svalues)    PetscFree(baij->svalues);
96857b952d6SSatish Balay 
96957b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
97057b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
97157b952d6SSatish Balay 
97257b952d6SSatish Balay   /* determine if any processor has disassembled, if so we must
97357b952d6SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
9746e713f22SBarry Smith   /*
9756e713f22SBarry Smith      if nonzero structure of submatrix B cannot change then we know that
9766e713f22SBarry Smith      no processor disassembled thus we can skip this stuff
9776e713f22SBarry Smith   */
9786e713f22SBarry Smith   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
979ca161407SBarry Smith     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
98057b952d6SSatish Balay     if (mat->was_assembled && !other_disassembled) {
98157b952d6SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
98257b952d6SSatish Balay     }
9836e713f22SBarry Smith   }
98457b952d6SSatish Balay 
9856d4a8577SBarry Smith   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
98657b952d6SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
98757b952d6SSatish Balay   }
98857b952d6SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
98957b952d6SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
99057b952d6SSatish Balay 
991187ce0cbSSatish Balay #if defined(USE_PETSC_BOPT_g)
992187ce0cbSSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
993187ce0cbSSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
994187ce0cbSSatish Balay              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
995187ce0cbSSatish Balay     baij->ht_total_ct  = 0;
996187ce0cbSSatish Balay     baij->ht_insert_ct = 0;
997187ce0cbSSatish Balay   }
998187ce0cbSSatish Balay #endif
999133cdb44SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1000133cdb44SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
1001f830108cSBarry Smith     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1002f830108cSBarry Smith     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1003bd7f49f5SSatish Balay   }
1004187ce0cbSSatish Balay 
100557b952d6SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
10063a40ed3dSBarry Smith   PetscFunctionReturn(0);
100757b952d6SSatish Balay }
1008*bbb85fb3SSatish Balay #endif
1009*bbb85fb3SSatish Balay #undef __FUNC__
1010*bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyBegin_MPIBAIJ"
1011*bbb85fb3SSatish Balay int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
1012*bbb85fb3SSatish Balay {
1013*bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1014*bbb85fb3SSatish Balay   int         ierr;
1015*bbb85fb3SSatish Balay   InsertMode  addv;
1016*bbb85fb3SSatish Balay 
1017*bbb85fb3SSatish Balay   PetscFunctionBegin;
1018*bbb85fb3SSatish Balay   if (baij->donotstash) {
1019*bbb85fb3SSatish Balay     PetscFunctionReturn(0);
1020*bbb85fb3SSatish Balay   }
1021*bbb85fb3SSatish Balay 
1022*bbb85fb3SSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
1023*bbb85fb3SSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
1024*bbb85fb3SSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
1025*bbb85fb3SSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
1026*bbb85fb3SSatish Balay   }
1027*bbb85fb3SSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
1028*bbb85fb3SSatish Balay 
1029*bbb85fb3SSatish Balay   ierr =  StashScatterBegin_Private(&baij->stash,baij->rowners); CHKERRQ(ierr);
1030*bbb85fb3SSatish Balay 
1031*bbb85fb3SSatish Balay   /* Free cache space */
1032*bbb85fb3SSatish Balay   PLogInfo(baij->A,"MatAssemblyBegin_MPIBAIJ:Number of off-processor values %d\n",baij->stash.n);
1033*bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1034*bbb85fb3SSatish Balay }
1035*bbb85fb3SSatish Balay 
1036*bbb85fb3SSatish Balay #undef __FUNC__
1037*bbb85fb3SSatish Balay #define __FUNC__ "MatAssemblyEnd_MPIBAIJ"
1038*bbb85fb3SSatish Balay int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
1039*bbb85fb3SSatish Balay {
1040*bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1041*bbb85fb3SSatish Balay   int         imdex,i,j,rstart,ncols,n,ierr;
1042*bbb85fb3SSatish Balay   int         *row,*col,other_disassembled;
1043*bbb85fb3SSatish Balay   Scalar      *val;
1044*bbb85fb3SSatish Balay   InsertMode  addv = mat->insertmode;
1045*bbb85fb3SSatish Balay 
1046*bbb85fb3SSatish Balay   PetscFunctionBegin;
1047*bbb85fb3SSatish Balay   if (!baij->donotstash) {
1048*bbb85fb3SSatish Balay     ierr = StashScatterEnd_Private(&baij->stash); CHKERRQ(ierr);
1049*bbb85fb3SSatish Balay     for (imdex=0; imdex<baij->stash.nrecvs; imdex++ ) {
1050*bbb85fb3SSatish Balay       n   = baij->stash.rdata[imdex].n;
1051*bbb85fb3SSatish Balay       row = baij->stash.rdata[imdex].i;
1052*bbb85fb3SSatish Balay       col = baij->stash.rdata[imdex].j;
1053*bbb85fb3SSatish Balay       val = baij->stash.rdata[imdex].a;
1054*bbb85fb3SSatish Balay       for ( i=0; i<n; ) {
1055*bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1056*bbb85fb3SSatish Balay         for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; }
1057*bbb85fb3SSatish Balay         if (j < n) ncols = j-i;
1058*bbb85fb3SSatish Balay         else       ncols = n-i;
1059*bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
1060*bbb85fb3SSatish Balay         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv); CHKERRQ(ierr);
1061*bbb85fb3SSatish Balay         i = j;
1062*bbb85fb3SSatish Balay       }
1063*bbb85fb3SSatish Balay     }
1064*bbb85fb3SSatish Balay   ierr = StashReset_Private(&baij->stash); CHKERRQ(ierr);
1065*bbb85fb3SSatish Balay   }
1066*bbb85fb3SSatish Balay 
1067*bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->A,mode); CHKERRQ(ierr);
1068*bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->A,mode); CHKERRQ(ierr);
1069*bbb85fb3SSatish Balay 
1070*bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
1071*bbb85fb3SSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1072*bbb85fb3SSatish Balay   /*
1073*bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1074*bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
1075*bbb85fb3SSatish Balay   */
1076*bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ*) baij->B->data)->nonew)  {
1077*bbb85fb3SSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1078*bbb85fb3SSatish Balay     if (mat->was_assembled && !other_disassembled) {
1079*bbb85fb3SSatish Balay       ierr = DisAssemble_MPIBAIJ(mat); CHKERRQ(ierr);
1080*bbb85fb3SSatish Balay     }
1081*bbb85fb3SSatish Balay   }
1082*bbb85fb3SSatish Balay 
1083*bbb85fb3SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1084*bbb85fb3SSatish Balay     ierr = MatSetUpMultiply_MPIBAIJ(mat); CHKERRQ(ierr);
1085*bbb85fb3SSatish Balay   }
1086*bbb85fb3SSatish Balay   ierr = MatAssemblyBegin(baij->B,mode); CHKERRQ(ierr);
1087*bbb85fb3SSatish Balay   ierr = MatAssemblyEnd(baij->B,mode); CHKERRQ(ierr);
1088*bbb85fb3SSatish Balay 
1089*bbb85fb3SSatish Balay #if defined(USE_PETSC_BOPT_g)
1090*bbb85fb3SSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1091*bbb85fb3SSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",
1092*bbb85fb3SSatish Balay              ((double)baij->ht_total_ct)/baij->ht_insert_ct);
1093*bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
1094*bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
1095*bbb85fb3SSatish Balay   }
1096*bbb85fb3SSatish Balay #endif
1097*bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1098*bbb85fb3SSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact); CHKERRQ(ierr);
1099*bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1100*bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1101*bbb85fb3SSatish Balay   }
1102*bbb85fb3SSatish Balay 
1103*bbb85fb3SSatish Balay   if (baij->rowvalues) {PetscFree(baij->rowvalues); baij->rowvalues = 0;}
1104*bbb85fb3SSatish Balay   PetscFunctionReturn(0);
1105*bbb85fb3SSatish Balay }
110657b952d6SSatish Balay 
11075615d1e5SSatish Balay #undef __FUNC__
11085615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ_Binary"
110957b952d6SSatish Balay static int MatView_MPIBAIJ_Binary(Mat mat,Viewer viewer)
111057b952d6SSatish Balay {
111157b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
111257b952d6SSatish Balay   int          ierr;
111357b952d6SSatish Balay 
1114d64ed03dSBarry Smith   PetscFunctionBegin;
111557b952d6SSatish Balay   if (baij->size == 1) {
111657b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1117a8c6a408SBarry Smith   } else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported");
11183a40ed3dSBarry Smith   PetscFunctionReturn(0);
111957b952d6SSatish Balay }
112057b952d6SSatish Balay 
11215615d1e5SSatish Balay #undef __FUNC__
11227b2a1423SBarry Smith #define __FUNC__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
11237b2a1423SBarry Smith static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
112457b952d6SSatish Balay {
112557b952d6SSatish Balay   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ *) mat->data;
112677ed5343SBarry Smith   int          ierr, format,bs = baij->bs, size = baij->size, rank = baij->rank;
112757b952d6SSatish Balay   FILE         *fd;
112857b952d6SSatish Balay   ViewerType   vtype;
112957b952d6SSatish Balay 
1130d64ed03dSBarry Smith   PetscFunctionBegin;
113157b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
11323f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,ASCII_VIEWER)) {
1133d41123aaSBarry Smith     ierr = ViewerGetFormat(viewer,&format); CHKERRQ(ierr);
1134639f9d9dSBarry Smith     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
11354e220ebcSLois Curfman McInnes       MatInfo info;
113657b952d6SSatish Balay       MPI_Comm_rank(mat->comm,&rank);
113757b952d6SSatish Balay       ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr);
1138d41123aaSBarry Smith       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
113957b952d6SSatish Balay       PetscSequentialPhaseBegin(mat->comm,1);
114057b952d6SSatish Balay       fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
11414e220ebcSLois Curfman McInnes               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
11424e220ebcSLois Curfman McInnes               baij->bs,(int)info.memory);
11434e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);
11444e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
11454e220ebcSLois Curfman McInnes       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);
11464e220ebcSLois Curfman McInnes       fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);
114757b952d6SSatish Balay       fflush(fd);
114857b952d6SSatish Balay       PetscSequentialPhaseEnd(mat->comm,1);
114957b952d6SSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer); CHKERRQ(ierr);
11503a40ed3dSBarry Smith       PetscFunctionReturn(0);
1151d64ed03dSBarry Smith     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1152bcc3fcf6SBarry Smith       PetscPrintf(mat->comm,"  block size is %d\n",bs);
11533a40ed3dSBarry Smith       PetscFunctionReturn(0);
115457b952d6SSatish Balay     }
115557b952d6SSatish Balay   }
115657b952d6SSatish Balay 
11573f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,DRAW_VIEWER)) {
115857b952d6SSatish Balay     Draw       draw;
115957b952d6SSatish Balay     PetscTruth isnull;
116077ed5343SBarry Smith     ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr);
11613a40ed3dSBarry Smith     ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
116257b952d6SSatish Balay   }
116357b952d6SSatish Balay 
116457b952d6SSatish Balay   if (size == 1) {
116557b952d6SSatish Balay     ierr = MatView(baij->A,viewer); CHKERRQ(ierr);
1166d64ed03dSBarry Smith   } else {
116757b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
116857b952d6SSatish Balay     Mat         A;
116957b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
117040011551SBarry Smith     int         M = baij->M, N = baij->N,*ai,*aj,col,i,j,k,*rvals;
117157b952d6SSatish Balay     int         mbs = baij->mbs;
117257b952d6SSatish Balay     Scalar      *a;
117357b952d6SSatish Balay 
117457b952d6SSatish Balay     if (!rank) {
117555843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1176d64ed03dSBarry Smith     } else {
117755843e3eSBarry Smith       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
117857b952d6SSatish Balay     }
117957b952d6SSatish Balay     PLogObjectParent(mat,A);
118057b952d6SSatish Balay 
118157b952d6SSatish Balay     /* copy over the A part */
118257b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->A->data;
118357b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
118457b952d6SSatish Balay     rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
118557b952d6SSatish Balay 
118657b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
118757b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
118857b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
118957b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
119057b952d6SSatish Balay         col = (baij->cstart+aj[j])*bs;
119157b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1192cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1193cee3aa6bSSatish Balay           col++; a += bs;
119457b952d6SSatish Balay         }
119557b952d6SSatish Balay       }
119657b952d6SSatish Balay     }
119757b952d6SSatish Balay     /* copy over the B part */
119857b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ*) baij->B->data;
119957b952d6SSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
120057b952d6SSatish Balay     for ( i=0; i<mbs; i++ ) {
120157b952d6SSatish Balay       rvals[0] = bs*(baij->rstart + i);
120257b952d6SSatish Balay       for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
120357b952d6SSatish Balay       for ( j=ai[i]; j<ai[i+1]; j++ ) {
120457b952d6SSatish Balay         col = baij->garray[aj[j]]*bs;
120557b952d6SSatish Balay         for (k=0; k<bs; k++ ) {
1206cee3aa6bSSatish Balay           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1207cee3aa6bSSatish Balay           col++; a += bs;
120857b952d6SSatish Balay         }
120957b952d6SSatish Balay       }
121057b952d6SSatish Balay     }
121157b952d6SSatish Balay     PetscFree(rvals);
12126d4a8577SBarry Smith     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
12136d4a8577SBarry Smith     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
121455843e3eSBarry Smith     /*
121555843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
121655843e3eSBarry Smith        synchronized across all processors that share the Draw object
121755843e3eSBarry Smith     */
12183f1db9ecSBarry Smith     if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) {
121957b952d6SSatish Balay       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,viewer); CHKERRQ(ierr);
122057b952d6SSatish Balay     }
122157b952d6SSatish Balay     ierr = MatDestroy(A); CHKERRQ(ierr);
122257b952d6SSatish Balay   }
12233a40ed3dSBarry Smith   PetscFunctionReturn(0);
122457b952d6SSatish Balay }
122557b952d6SSatish Balay 
122657b952d6SSatish Balay 
122757b952d6SSatish Balay 
12285615d1e5SSatish Balay #undef __FUNC__
12295615d1e5SSatish Balay #define __FUNC__ "MatView_MPIBAIJ"
1230e1311b90SBarry Smith int MatView_MPIBAIJ(Mat mat,Viewer viewer)
123157b952d6SSatish Balay {
123257b952d6SSatish Balay   int         ierr;
123357b952d6SSatish Balay   ViewerType  vtype;
123457b952d6SSatish Balay 
1235d64ed03dSBarry Smith   PetscFunctionBegin;
123657b952d6SSatish Balay   ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr);
12373f1db9ecSBarry Smith   if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) ||
12387b2a1423SBarry Smith       PetscTypeCompare(vtype,SOCKET_VIEWER)) {
12397b2a1423SBarry Smith     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer); CHKERRQ(ierr);
12403f1db9ecSBarry Smith   } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) {
12413a40ed3dSBarry Smith     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
12425cd90555SBarry Smith   } else {
12435cd90555SBarry Smith     SETERRQ(1,1,"Viewer type not supported by PETSc object");
124457b952d6SSatish Balay   }
12453a40ed3dSBarry Smith   PetscFunctionReturn(0);
124657b952d6SSatish Balay }
124757b952d6SSatish Balay 
12485615d1e5SSatish Balay #undef __FUNC__
12495615d1e5SSatish Balay #define __FUNC__ "MatDestroy_MPIBAIJ"
1250e1311b90SBarry Smith int MatDestroy_MPIBAIJ(Mat mat)
125179bdfe76SSatish Balay {
125279bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
125379bdfe76SSatish Balay   int         ierr;
125479bdfe76SSatish Balay 
1255d64ed03dSBarry Smith   PetscFunctionBegin;
125698dd23e9SBarry Smith   if (--mat->refct > 0) PetscFunctionReturn(0);
125798dd23e9SBarry Smith 
125898dd23e9SBarry Smith   if (mat->mapping) {
125998dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr);
126098dd23e9SBarry Smith   }
126198dd23e9SBarry Smith   if (mat->bmapping) {
126298dd23e9SBarry Smith     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr);
126398dd23e9SBarry Smith   }
126461b13de0SBarry Smith   if (mat->rmap) {
126561b13de0SBarry Smith     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
126661b13de0SBarry Smith   }
126761b13de0SBarry Smith   if (mat->cmap) {
126861b13de0SBarry Smith     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
126961b13de0SBarry Smith   }
12703a40ed3dSBarry Smith #if defined(USE_PETSC_LOG)
1271e1311b90SBarry Smith   PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",baij->M,baij->N);
127279bdfe76SSatish Balay #endif
127379bdfe76SSatish Balay 
127483e2fdc7SBarry Smith   ierr = StashDestroy_Private(&baij->stash); CHKERRQ(ierr);
127579bdfe76SSatish Balay   PetscFree(baij->rowners);
127679bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
127779bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
127848e59246SSatish Balay #if defined (USE_CTABLE)
127948e59246SSatish Balay   if (baij->colmap) TableDelete(baij->colmap);
128048e59246SSatish Balay #else
128179bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
128248e59246SSatish Balay #endif
128379bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
128479bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
128579bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
128679bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
128730793edcSSatish Balay   if (baij->barray) PetscFree(baij->barray);
1288b9e4cc15SSatish Balay   if (baij->hd) PetscFree(baij->hd);
128979bdfe76SSatish Balay   PetscFree(baij);
129079bdfe76SSatish Balay   PLogObjectDestroy(mat);
129179bdfe76SSatish Balay   PetscHeaderDestroy(mat);
12923a40ed3dSBarry Smith   PetscFunctionReturn(0);
129379bdfe76SSatish Balay }
129479bdfe76SSatish Balay 
12955615d1e5SSatish Balay #undef __FUNC__
12965615d1e5SSatish Balay #define __FUNC__ "MatMult_MPIBAIJ"
1297ec1ea8d8SLois Curfman McInnes int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1298cee3aa6bSSatish Balay {
1299cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
130047b4a8eaSLois Curfman McInnes   int         ierr, nt;
1301cee3aa6bSSatish Balay 
1302d64ed03dSBarry Smith   PetscFunctionBegin;
1303e1311b90SBarry Smith   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
130447b4a8eaSLois Curfman McInnes   if (nt != a->n) {
1305a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
130647b4a8eaSLois Curfman McInnes   }
1307e1311b90SBarry Smith   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
130847b4a8eaSLois Curfman McInnes   if (nt != a->m) {
1309a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
131047b4a8eaSLois Curfman McInnes   }
131143a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1312f830108cSBarry Smith   ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr);
131343a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1314f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr);
131543a90d84SBarry Smith   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
13163a40ed3dSBarry Smith   PetscFunctionReturn(0);
1317cee3aa6bSSatish Balay }
1318cee3aa6bSSatish Balay 
13195615d1e5SSatish Balay #undef __FUNC__
13205615d1e5SSatish Balay #define __FUNC__ "MatMultAdd_MPIBAIJ"
1321ec1ea8d8SLois Curfman McInnes int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1322cee3aa6bSSatish Balay {
1323cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1324cee3aa6bSSatish Balay   int        ierr;
1325d64ed03dSBarry Smith 
1326d64ed03dSBarry Smith   PetscFunctionBegin;
132743a90d84SBarry Smith   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1328f830108cSBarry Smith   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
132943a90d84SBarry Smith   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1330f830108cSBarry Smith   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr);
13313a40ed3dSBarry Smith   PetscFunctionReturn(0);
1332cee3aa6bSSatish Balay }
1333cee3aa6bSSatish Balay 
13345615d1e5SSatish Balay #undef __FUNC__
13355615d1e5SSatish Balay #define __FUNC__ "MatMultTrans_MPIBAIJ"
1336ec1ea8d8SLois Curfman McInnes int MatMultTrans_MPIBAIJ(Mat A,Vec xx,Vec yy)
1337cee3aa6bSSatish Balay {
1338cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1339cee3aa6bSSatish Balay   int         ierr;
1340cee3aa6bSSatish Balay 
1341d64ed03dSBarry Smith   PetscFunctionBegin;
1342cee3aa6bSSatish Balay   /* do nondiagonal part */
1343f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1344cee3aa6bSSatish Balay   /* send it on its way */
1345537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1346cee3aa6bSSatish Balay   /* do local part */
1347f830108cSBarry Smith   ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr);
1348cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1349cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1350cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1351639f9d9dSBarry Smith   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
13523a40ed3dSBarry Smith   PetscFunctionReturn(0);
1353cee3aa6bSSatish Balay }
1354cee3aa6bSSatish Balay 
13555615d1e5SSatish Balay #undef __FUNC__
13565615d1e5SSatish Balay #define __FUNC__ "MatMultTransAdd_MPIBAIJ"
1357ec1ea8d8SLois Curfman McInnes int MatMultTransAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1358cee3aa6bSSatish Balay {
1359cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1360cee3aa6bSSatish Balay   int         ierr;
1361cee3aa6bSSatish Balay 
1362d64ed03dSBarry Smith   PetscFunctionBegin;
1363cee3aa6bSSatish Balay   /* do nondiagonal part */
1364f830108cSBarry Smith   ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr);
1365cee3aa6bSSatish Balay   /* send it on its way */
1366537820f0SBarry Smith   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
1367cee3aa6bSSatish Balay   /* do local part */
1368f830108cSBarry Smith   ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr);
1369cee3aa6bSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1370cee3aa6bSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1371cee3aa6bSSatish Balay   /* but is not perhaps always true. */
1372537820f0SBarry Smith   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr);
13733a40ed3dSBarry Smith   PetscFunctionReturn(0);
1374cee3aa6bSSatish Balay }
1375cee3aa6bSSatish Balay 
1376cee3aa6bSSatish Balay /*
1377cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1378cee3aa6bSSatish Balay    diagonal block
1379cee3aa6bSSatish Balay */
13805615d1e5SSatish Balay #undef __FUNC__
13815615d1e5SSatish Balay #define __FUNC__ "MatGetDiagonal_MPIBAIJ"
1382ec1ea8d8SLois Curfman McInnes int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1383cee3aa6bSSatish Balay {
1384cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
13853a40ed3dSBarry Smith   int         ierr;
1386d64ed03dSBarry Smith 
1387d64ed03dSBarry Smith   PetscFunctionBegin;
1388a8c6a408SBarry Smith   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
13893a40ed3dSBarry Smith   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
13903a40ed3dSBarry Smith   PetscFunctionReturn(0);
1391cee3aa6bSSatish Balay }
1392cee3aa6bSSatish Balay 
13935615d1e5SSatish Balay #undef __FUNC__
13945615d1e5SSatish Balay #define __FUNC__ "MatScale_MPIBAIJ"
1395ec1ea8d8SLois Curfman McInnes int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1396cee3aa6bSSatish Balay {
1397cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
1398cee3aa6bSSatish Balay   int         ierr;
1399d64ed03dSBarry Smith 
1400d64ed03dSBarry Smith   PetscFunctionBegin;
1401cee3aa6bSSatish Balay   ierr = MatScale(aa,a->A); CHKERRQ(ierr);
1402cee3aa6bSSatish Balay   ierr = MatScale(aa,a->B); CHKERRQ(ierr);
14033a40ed3dSBarry Smith   PetscFunctionReturn(0);
1404cee3aa6bSSatish Balay }
1405026e39d0SSatish Balay 
14065615d1e5SSatish Balay #undef __FUNC__
14075615d1e5SSatish Balay #define __FUNC__ "MatGetSize_MPIBAIJ"
1408ec1ea8d8SLois Curfman McInnes int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
140957b952d6SSatish Balay {
141057b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1411d64ed03dSBarry Smith 
1412d64ed03dSBarry Smith   PetscFunctionBegin;
1413bd7f49f5SSatish Balay   if (m) *m = mat->M;
1414bd7f49f5SSatish Balay   if (n) *n = mat->N;
14153a40ed3dSBarry Smith   PetscFunctionReturn(0);
141657b952d6SSatish Balay }
141757b952d6SSatish Balay 
14185615d1e5SSatish Balay #undef __FUNC__
14195615d1e5SSatish Balay #define __FUNC__ "MatGetLocalSize_MPIBAIJ"
1420ec1ea8d8SLois Curfman McInnes int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
142157b952d6SSatish Balay {
142257b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1423d64ed03dSBarry Smith 
1424d64ed03dSBarry Smith   PetscFunctionBegin;
1425f830108cSBarry Smith   *m = mat->m; *n = mat->n;
14263a40ed3dSBarry Smith   PetscFunctionReturn(0);
142757b952d6SSatish Balay }
142857b952d6SSatish Balay 
14295615d1e5SSatish Balay #undef __FUNC__
14305615d1e5SSatish Balay #define __FUNC__ "MatGetOwnershipRange_MPIBAIJ"
1431ec1ea8d8SLois Curfman McInnes int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
143257b952d6SSatish Balay {
143357b952d6SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1434d64ed03dSBarry Smith 
1435d64ed03dSBarry Smith   PetscFunctionBegin;
143657b952d6SSatish Balay   *m = mat->rstart*mat->bs; *n = mat->rend*mat->bs;
14373a40ed3dSBarry Smith   PetscFunctionReturn(0);
143857b952d6SSatish Balay }
143957b952d6SSatish Balay 
14405615d1e5SSatish Balay #undef __FUNC__
14415615d1e5SSatish Balay #define __FUNC__ "MatGetRow_MPIBAIJ"
1442acdf5bf4SSatish Balay int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1443acdf5bf4SSatish Balay {
1444acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *) matin->data;
1445acdf5bf4SSatish Balay   Scalar     *vworkA, *vworkB, **pvA, **pvB,*v_p;
1446acdf5bf4SSatish Balay   int        bs = mat->bs, bs2 = mat->bs2, i, ierr, *cworkA, *cworkB, **pcA, **pcB;
1447d9d09a02SSatish Balay   int        nztot, nzA, nzB, lrow, brstart = mat->rstart*bs, brend = mat->rend*bs;
1448d9d09a02SSatish Balay   int        *cmap, *idx_p,cstart = mat->cstart;
1449acdf5bf4SSatish Balay 
1450d64ed03dSBarry Smith   PetscFunctionBegin;
1451a8c6a408SBarry Smith   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1452acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1453acdf5bf4SSatish Balay 
1454acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1455acdf5bf4SSatish Balay     /*
1456acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1457acdf5bf4SSatish Balay     */
1458acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *) mat->A->data,*Ba = (Mat_SeqBAIJ *) mat->B->data;
1459bd16c2feSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1460bd16c2feSSatish Balay     for ( i=0; i<mbs; i++ ) {
1461acdf5bf4SSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1462acdf5bf4SSatish Balay       if (max < tmp) { max = tmp; }
1463acdf5bf4SSatish Balay     }
1464acdf5bf4SSatish Balay     mat->rowvalues = (Scalar *) PetscMalloc( max*bs2*(sizeof(int)+sizeof(Scalar)));
1465acdf5bf4SSatish Balay     CHKPTRQ(mat->rowvalues);
1466acdf5bf4SSatish Balay     mat->rowindices = (int *) (mat->rowvalues + max*bs2);
1467acdf5bf4SSatish Balay   }
1468acdf5bf4SSatish Balay 
1469a8c6a408SBarry Smith   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1470d9d09a02SSatish Balay   lrow = row - brstart;
1471acdf5bf4SSatish Balay 
1472acdf5bf4SSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1473acdf5bf4SSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1474acdf5bf4SSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1475f830108cSBarry Smith   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1476f830108cSBarry Smith   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
1477acdf5bf4SSatish Balay   nztot = nzA + nzB;
1478acdf5bf4SSatish Balay 
1479acdf5bf4SSatish Balay   cmap  = mat->garray;
1480acdf5bf4SSatish Balay   if (v  || idx) {
1481acdf5bf4SSatish Balay     if (nztot) {
1482acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1483acdf5bf4SSatish Balay       int imark = -1;
1484acdf5bf4SSatish Balay       if (v) {
1485acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1486acdf5bf4SSatish Balay         for ( i=0; i<nzB; i++ ) {
1487d9d09a02SSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1488acdf5bf4SSatish Balay           else break;
1489acdf5bf4SSatish Balay         }
1490acdf5bf4SSatish Balay         imark = i;
1491acdf5bf4SSatish Balay         for ( i=0; i<nzA; i++ )     v_p[imark+i] = vworkA[i];
1492acdf5bf4SSatish Balay         for ( i=imark; i<nzB; i++ ) v_p[nzA+i]   = vworkB[i];
1493acdf5bf4SSatish Balay       }
1494acdf5bf4SSatish Balay       if (idx) {
1495acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1496acdf5bf4SSatish Balay         if (imark > -1) {
1497acdf5bf4SSatish Balay           for ( i=0; i<imark; i++ ) {
1498bd16c2feSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1499acdf5bf4SSatish Balay           }
1500acdf5bf4SSatish Balay         } else {
1501acdf5bf4SSatish Balay           for ( i=0; i<nzB; i++ ) {
1502d9d09a02SSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1503d9d09a02SSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1504acdf5bf4SSatish Balay             else break;
1505acdf5bf4SSatish Balay           }
1506acdf5bf4SSatish Balay           imark = i;
1507acdf5bf4SSatish Balay         }
1508d9d09a02SSatish Balay         for ( i=0; i<nzA; i++ )     idx_p[imark+i] = cstart*bs + cworkA[i];
1509d9d09a02SSatish Balay         for ( i=imark; i<nzB; i++ ) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1510acdf5bf4SSatish Balay       }
1511d64ed03dSBarry Smith     } else {
1512d212a18eSSatish Balay       if (idx) *idx = 0;
1513d212a18eSSatish Balay       if (v)   *v   = 0;
1514d212a18eSSatish Balay     }
1515acdf5bf4SSatish Balay   }
1516acdf5bf4SSatish Balay   *nz = nztot;
1517f830108cSBarry Smith   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr);
1518f830108cSBarry Smith   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr);
15193a40ed3dSBarry Smith   PetscFunctionReturn(0);
1520acdf5bf4SSatish Balay }
1521acdf5bf4SSatish Balay 
15225615d1e5SSatish Balay #undef __FUNC__
15235615d1e5SSatish Balay #define __FUNC__ "MatRestoreRow_MPIBAIJ"
1524acdf5bf4SSatish Balay int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1525acdf5bf4SSatish Balay {
1526acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1527d64ed03dSBarry Smith 
1528d64ed03dSBarry Smith   PetscFunctionBegin;
1529acdf5bf4SSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1530a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1531acdf5bf4SSatish Balay   }
1532acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
15333a40ed3dSBarry Smith   PetscFunctionReturn(0);
1534acdf5bf4SSatish Balay }
1535acdf5bf4SSatish Balay 
15365615d1e5SSatish Balay #undef __FUNC__
15375615d1e5SSatish Balay #define __FUNC__ "MatGetBlockSize_MPIBAIJ"
1538ec1ea8d8SLois Curfman McInnes int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
15395a838052SSatish Balay {
15405a838052SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
1541d64ed03dSBarry Smith 
1542d64ed03dSBarry Smith   PetscFunctionBegin;
15435a838052SSatish Balay   *bs = baij->bs;
15443a40ed3dSBarry Smith   PetscFunctionReturn(0);
15455a838052SSatish Balay }
15465a838052SSatish Balay 
15475615d1e5SSatish Balay #undef __FUNC__
15485615d1e5SSatish Balay #define __FUNC__ "MatZeroEntries_MPIBAIJ"
1549ec1ea8d8SLois Curfman McInnes int MatZeroEntries_MPIBAIJ(Mat A)
155058667388SSatish Balay {
155158667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data;
155258667388SSatish Balay   int         ierr;
1553d64ed03dSBarry Smith 
1554d64ed03dSBarry Smith   PetscFunctionBegin;
155558667388SSatish Balay   ierr = MatZeroEntries(l->A); CHKERRQ(ierr);
155658667388SSatish Balay   ierr = MatZeroEntries(l->B); CHKERRQ(ierr);
15573a40ed3dSBarry Smith   PetscFunctionReturn(0);
155858667388SSatish Balay }
15590ac07820SSatish Balay 
15605615d1e5SSatish Balay #undef __FUNC__
15615615d1e5SSatish Balay #define __FUNC__ "MatGetInfo_MPIBAIJ"
1562ec1ea8d8SLois Curfman McInnes int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
15630ac07820SSatish Balay {
15644e220ebcSLois Curfman McInnes   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) matin->data;
15654e220ebcSLois Curfman McInnes   Mat         A = a->A, B = a->B;
15667d57db60SLois Curfman McInnes   int         ierr;
15677d57db60SLois Curfman McInnes   double      isend[5], irecv[5];
15680ac07820SSatish Balay 
1569d64ed03dSBarry Smith   PetscFunctionBegin;
15704e220ebcSLois Curfman McInnes   info->block_size     = (double)a->bs;
15714e220ebcSLois Curfman McInnes   ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr);
15720e4b21beSBarry Smith   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1573de87f314SBarry Smith   isend[3] = info->memory;  isend[4] = info->mallocs;
15744e220ebcSLois Curfman McInnes   ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr);
15750e4b21beSBarry Smith   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1576de87f314SBarry Smith   isend[3] += info->memory;  isend[4] += info->mallocs;
15770ac07820SSatish Balay   if (flag == MAT_LOCAL) {
15784e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
15794e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
15804e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
15814e220ebcSLois Curfman McInnes     info->memory       = isend[3];
15824e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
15830ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1584f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
15854e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15864e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15874e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15884e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15894e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
15900ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1591f7cdd7c9SBarry Smith     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
15924e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
15934e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
15944e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
15954e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
15964e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
1597d41123aaSBarry Smith   } else {
1598d41123aaSBarry Smith     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
15990ac07820SSatish Balay   }
16005a5d4f66SBarry Smith   info->rows_global       = (double)a->M;
16015a5d4f66SBarry Smith   info->columns_global    = (double)a->N;
16025a5d4f66SBarry Smith   info->rows_local        = (double)a->m;
16035a5d4f66SBarry Smith   info->columns_local     = (double)a->N;
16044e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
16054e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
16064e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
16073a40ed3dSBarry Smith   PetscFunctionReturn(0);
16080ac07820SSatish Balay }
16090ac07820SSatish Balay 
16105615d1e5SSatish Balay #undef __FUNC__
16115615d1e5SSatish Balay #define __FUNC__ "MatSetOption_MPIBAIJ"
1612ec1ea8d8SLois Curfman McInnes int MatSetOption_MPIBAIJ(Mat A,MatOption op)
161358667388SSatish Balay {
161458667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *) A->data;
161598305bb5SBarry Smith   int         ierr;
161658667388SSatish Balay 
1617d64ed03dSBarry Smith   PetscFunctionBegin;
161858667388SSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
161958667388SSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
16206da5968aSLois Curfman McInnes       op == MAT_COLUMNS_UNSORTED ||
1621c2653b3dSLois Curfman McInnes       op == MAT_COLUMNS_SORTED ||
16224787f768SSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
16234787f768SSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
162498305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
162598305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1626b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROW_ORIENTED) {
1627aeafbbfcSLois Curfman McInnes         a->roworiented = 1;
162898305bb5SBarry Smith         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
162998305bb5SBarry Smith         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1630b1fbbac0SLois Curfman McInnes   } else if (op == MAT_ROWS_SORTED ||
16316da5968aSLois Curfman McInnes              op == MAT_ROWS_UNSORTED ||
163258667388SSatish Balay              op == MAT_SYMMETRIC ||
163358667388SSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1634b51ba29fSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
163598305bb5SBarry Smith              op == MAT_USE_HASH_TABLE) {
163658667388SSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
163798305bb5SBarry Smith   } else if (op == MAT_COLUMN_ORIENTED) {
163858667388SSatish Balay     a->roworiented = 0;
163998305bb5SBarry Smith     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
164098305bb5SBarry Smith     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
16412b362799SSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
164290f02eecSBarry Smith     a->donotstash = 1;
1643d64ed03dSBarry Smith   } else if (op == MAT_NO_NEW_DIAGONALS) {
1644d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1645133cdb44SSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1646133cdb44SSatish Balay     a->ht_flag = 1;
1647d64ed03dSBarry Smith   } else {
1648d64ed03dSBarry Smith     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1649d64ed03dSBarry Smith   }
16503a40ed3dSBarry Smith   PetscFunctionReturn(0);
165158667388SSatish Balay }
165258667388SSatish Balay 
16535615d1e5SSatish Balay #undef __FUNC__
16545615d1e5SSatish Balay #define __FUNC__ "MatTranspose_MPIBAIJ("
1655ec1ea8d8SLois Curfman McInnes int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
16560ac07820SSatish Balay {
16570ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
16580ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
16590ac07820SSatish Balay   Mat        B;
166040011551SBarry Smith   int        ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
16610ac07820SSatish Balay   int        bs=baij->bs,mbs=baij->mbs;
16620ac07820SSatish Balay   Scalar     *a;
16630ac07820SSatish Balay 
1664d64ed03dSBarry Smith   PetscFunctionBegin;
1665a8c6a408SBarry Smith   if (matout == PETSC_NULL && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
16660ac07820SSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,PETSC_DECIDE,PETSC_DECIDE,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
16670ac07820SSatish Balay   CHKERRQ(ierr);
16680ac07820SSatish Balay 
16690ac07820SSatish Balay   /* copy over the A part */
16700ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->A->data;
16710ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
16720ac07820SSatish Balay   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
16730ac07820SSatish Balay 
16740ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
16750ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16760ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
16770ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
16780ac07820SSatish Balay       col = (baij->cstart+aj[j])*bs;
16790ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
16800ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16810ac07820SSatish Balay         col++; a += bs;
16820ac07820SSatish Balay       }
16830ac07820SSatish Balay     }
16840ac07820SSatish Balay   }
16850ac07820SSatish Balay   /* copy over the B part */
16860ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ*) baij->B->data;
16870ac07820SSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
16880ac07820SSatish Balay   for ( i=0; i<mbs; i++ ) {
16890ac07820SSatish Balay     rvals[0] = bs*(baij->rstart + i);
16900ac07820SSatish Balay     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
16910ac07820SSatish Balay     for ( j=ai[i]; j<ai[i+1]; j++ ) {
16920ac07820SSatish Balay       col = baij->garray[aj[j]]*bs;
16930ac07820SSatish Balay       for (k=0; k<bs; k++ ) {
16940ac07820SSatish Balay         ierr = MatSetValues(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
16950ac07820SSatish Balay         col++; a += bs;
16960ac07820SSatish Balay       }
16970ac07820SSatish Balay     }
16980ac07820SSatish Balay   }
16990ac07820SSatish Balay   PetscFree(rvals);
17000ac07820SSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
17010ac07820SSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
17020ac07820SSatish Balay 
17030ac07820SSatish Balay   if (matout != PETSC_NULL) {
17040ac07820SSatish Balay     *matout = B;
17050ac07820SSatish Balay   } else {
1706f830108cSBarry Smith     PetscOps *Abops;
1707cc2dc46cSBarry Smith     MatOps   Aops;
1708f830108cSBarry Smith 
17090ac07820SSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
17100ac07820SSatish Balay     PetscFree(baij->rowners);
17110ac07820SSatish Balay     ierr = MatDestroy(baij->A); CHKERRQ(ierr);
17120ac07820SSatish Balay     ierr = MatDestroy(baij->B); CHKERRQ(ierr);
1713b1fc9764SSatish Balay #if defined (USE_CTABLE)
1714b1fc9764SSatish Balay     if (baij->colmap) TableDelete(baij->colmap);
1715b1fc9764SSatish Balay #else
17160ac07820SSatish Balay     if (baij->colmap) PetscFree(baij->colmap);
1717b1fc9764SSatish Balay #endif
17180ac07820SSatish Balay     if (baij->garray) PetscFree(baij->garray);
17190ac07820SSatish Balay     if (baij->lvec) VecDestroy(baij->lvec);
17200ac07820SSatish Balay     if (baij->Mvctx) VecScatterDestroy(baij->Mvctx);
17210ac07820SSatish Balay     PetscFree(baij);
1722f830108cSBarry Smith 
1723f830108cSBarry Smith     /*
1724f830108cSBarry Smith        This is horrible, horrible code. We need to keep the
1725f830108cSBarry Smith       A pointers for the bops and ops but copy everything
1726f830108cSBarry Smith       else from C.
1727f830108cSBarry Smith     */
1728f830108cSBarry Smith     Abops = A->bops;
1729f830108cSBarry Smith     Aops  = A->ops;
1730f09e8eb9SSatish Balay     PetscMemcpy(A,B,sizeof(struct _p_Mat));
1731f830108cSBarry Smith     A->bops = Abops;
1732f830108cSBarry Smith     A->ops  = Aops;
1733f830108cSBarry Smith 
17340ac07820SSatish Balay     PetscHeaderDestroy(B);
17350ac07820SSatish Balay   }
17363a40ed3dSBarry Smith   PetscFunctionReturn(0);
17370ac07820SSatish Balay }
17380e95ebc0SSatish Balay 
17395615d1e5SSatish Balay #undef __FUNC__
17405615d1e5SSatish Balay #define __FUNC__ "MatDiagonalScale_MPIBAIJ"
17410e95ebc0SSatish Balay int MatDiagonalScale_MPIBAIJ(Mat A,Vec ll,Vec rr)
17420e95ebc0SSatish Balay {
17430e95ebc0SSatish Balay   Mat a = ((Mat_MPIBAIJ *) A->data)->A;
17440e95ebc0SSatish Balay   Mat b = ((Mat_MPIBAIJ *) A->data)->B;
17450e95ebc0SSatish Balay   int ierr,s1,s2,s3;
17460e95ebc0SSatish Balay 
1747d64ed03dSBarry Smith   PetscFunctionBegin;
17480e95ebc0SSatish Balay   if (ll)  {
17490e95ebc0SSatish Balay     ierr = VecGetLocalSize(ll,&s1); CHKERRQ(ierr);
17500e95ebc0SSatish Balay     ierr = MatGetLocalSize(A,&s2,&s3); CHKERRQ(ierr);
1751a8c6a408SBarry Smith     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"non-conforming local sizes");
17520e95ebc0SSatish Balay     ierr = MatDiagonalScale(a,ll,0); CHKERRQ(ierr);
17530e95ebc0SSatish Balay     ierr = MatDiagonalScale(b,ll,0); CHKERRQ(ierr);
17540e95ebc0SSatish Balay   }
1755a8c6a408SBarry Smith   if (rr) SETERRQ(PETSC_ERR_SUP,0,"not supported for right vector");
17563a40ed3dSBarry Smith   PetscFunctionReturn(0);
17570e95ebc0SSatish Balay }
17580e95ebc0SSatish Balay 
17595615d1e5SSatish Balay #undef __FUNC__
17605615d1e5SSatish Balay #define __FUNC__ "MatZeroRows_MPIBAIJ"
1761ec1ea8d8SLois Curfman McInnes int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
17620ac07820SSatish Balay {
17630ac07820SSatish Balay   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ *) A->data;
17640ac07820SSatish Balay   int            i,ierr,N, *rows,*owners = l->rowners,size = l->size;
1765a07cd24cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
17660ac07820SSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
17670ac07820SSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1768a07cd24cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
17690ac07820SSatish Balay   MPI_Comm       comm = A->comm;
17700ac07820SSatish Balay   MPI_Request    *send_waits,*recv_waits;
17710ac07820SSatish Balay   MPI_Status     recv_status,*send_status;
17720ac07820SSatish Balay   IS             istmp;
17730ac07820SSatish Balay 
1774d64ed03dSBarry Smith   PetscFunctionBegin;
17750ac07820SSatish Balay   ierr = ISGetSize(is,&N); CHKERRQ(ierr);
17760ac07820SSatish Balay   ierr = ISGetIndices(is,&rows); CHKERRQ(ierr);
17770ac07820SSatish Balay 
17780ac07820SSatish Balay   /*  first count number of contributors to each processor */
17790ac07820SSatish Balay   nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs);
17800ac07820SSatish Balay   PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size;
17810ac07820SSatish Balay   owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/
17820ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
17830ac07820SSatish Balay     idx = rows[i];
17840ac07820SSatish Balay     found = 0;
17850ac07820SSatish Balay     for ( j=0; j<size; j++ ) {
17860ac07820SSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
17870ac07820SSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
17880ac07820SSatish Balay       }
17890ac07820SSatish Balay     }
1790a8c6a408SBarry Smith     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
17910ac07820SSatish Balay   }
17920ac07820SSatish Balay   nsends = 0;  for ( i=0; i<size; i++ ) { nsends += procs[i];}
17930ac07820SSatish Balay 
17940ac07820SSatish Balay   /* inform other processors of number of messages and max length*/
17950ac07820SSatish Balay   work   = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work);
1796ca161407SBarry Smith   ierr   = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
17970ac07820SSatish Balay   nrecvs = work[rank];
1798ca161407SBarry Smith   ierr   = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr);
17990ac07820SSatish Balay   nmax   = work[rank];
18000ac07820SSatish Balay   PetscFree(work);
18010ac07820SSatish Balay 
18020ac07820SSatish Balay   /* post receives:   */
1803d64ed03dSBarry Smith   rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int)); CHKPTRQ(rvalues);
1804d64ed03dSBarry Smith   recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
18050ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
1806ca161407SBarry Smith     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
18070ac07820SSatish Balay   }
18080ac07820SSatish Balay 
18090ac07820SSatish Balay   /* do sends:
18100ac07820SSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
18110ac07820SSatish Balay      the ith processor
18120ac07820SSatish Balay   */
18130ac07820SSatish Balay   svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues);
1814ca161407SBarry Smith   send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
18150ac07820SSatish Balay   starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts);
18160ac07820SSatish Balay   starts[0] = 0;
18170ac07820SSatish Balay   for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
18180ac07820SSatish Balay   for ( i=0; i<N; i++ ) {
18190ac07820SSatish Balay     svalues[starts[owner[i]]++] = rows[i];
18200ac07820SSatish Balay   }
18210ac07820SSatish Balay   ISRestoreIndices(is,&rows);
18220ac07820SSatish Balay 
18230ac07820SSatish Balay   starts[0] = 0;
18240ac07820SSatish Balay   for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];}
18250ac07820SSatish Balay   count = 0;
18260ac07820SSatish Balay   for ( i=0; i<size; i++ ) {
18270ac07820SSatish Balay     if (procs[i]) {
1828ca161407SBarry Smith       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
18290ac07820SSatish Balay     }
18300ac07820SSatish Balay   }
18310ac07820SSatish Balay   PetscFree(starts);
18320ac07820SSatish Balay 
18330ac07820SSatish Balay   base = owners[rank]*bs;
18340ac07820SSatish Balay 
18350ac07820SSatish Balay   /*  wait on receives */
18360ac07820SSatish Balay   lens   = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens);
18370ac07820SSatish Balay   source = lens + nrecvs;
18380ac07820SSatish Balay   count  = nrecvs; slen = 0;
18390ac07820SSatish Balay   while (count) {
1840ca161407SBarry Smith     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
18410ac07820SSatish Balay     /* unpack receives into our local space */
1842ca161407SBarry Smith     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
18430ac07820SSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
18440ac07820SSatish Balay     lens[imdex]  = n;
18450ac07820SSatish Balay     slen += n;
18460ac07820SSatish Balay     count--;
18470ac07820SSatish Balay   }
18480ac07820SSatish Balay   PetscFree(recv_waits);
18490ac07820SSatish Balay 
18500ac07820SSatish Balay   /* move the data into the send scatter */
18510ac07820SSatish Balay   lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows);
18520ac07820SSatish Balay   count = 0;
18530ac07820SSatish Balay   for ( i=0; i<nrecvs; i++ ) {
18540ac07820SSatish Balay     values = rvalues + i*nmax;
18550ac07820SSatish Balay     for ( j=0; j<lens[i]; j++ ) {
18560ac07820SSatish Balay       lrows[count++] = values[j] - base;
18570ac07820SSatish Balay     }
18580ac07820SSatish Balay   }
18590ac07820SSatish Balay   PetscFree(rvalues); PetscFree(lens);
18600ac07820SSatish Balay   PetscFree(owner); PetscFree(nprocs);
18610ac07820SSatish Balay 
18620ac07820SSatish Balay   /* actually zap the local rows */
1863029af93fSBarry Smith   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
18640ac07820SSatish Balay   PLogObjectParent(A,istmp);
1865a07cd24cSSatish Balay 
186672dacd9aSBarry Smith   /*
186772dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
186872dacd9aSBarry Smith      is square and the user wishes to set the diagonal we use seperate
186972dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
187072dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
187172dacd9aSBarry Smith 
187272dacd9aSBarry Smith        Contributed by: Mathew Knepley
187372dacd9aSBarry Smith   */
18749c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
18750ac07820SSatish Balay   ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr);
18769c957beeSSatish Balay   if (diag && (l->A->M == l->A->N)) {
18779c957beeSSatish Balay     ierr      = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr);
18789c957beeSSatish Balay   } else if (diag) {
18799c957beeSSatish Balay     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1880fa46199cSSatish Balay     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1881fa46199cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1882fa46199cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
18836525c446SSatish Balay     }
1884a07cd24cSSatish Balay     for ( i = 0; i < slen; i++ ) {
1885a07cd24cSSatish Balay       row  = lrows[i] + rstart_bs;
1886a07cd24cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1887a07cd24cSSatish Balay     }
1888a07cd24cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1889a07cd24cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
18909c957beeSSatish Balay   } else {
18919c957beeSSatish Balay     ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr);
1892a07cd24cSSatish Balay   }
18939c957beeSSatish Balay 
18949c957beeSSatish Balay   ierr = ISDestroy(istmp); CHKERRQ(ierr);
1895a07cd24cSSatish Balay   PetscFree(lrows);
1896a07cd24cSSatish Balay 
18970ac07820SSatish Balay   /* wait on sends */
18980ac07820SSatish Balay   if (nsends) {
1899d64ed03dSBarry Smith     send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1900ca161407SBarry Smith     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
19010ac07820SSatish Balay     PetscFree(send_status);
19020ac07820SSatish Balay   }
19030ac07820SSatish Balay   PetscFree(send_waits); PetscFree(svalues);
19040ac07820SSatish Balay 
19053a40ed3dSBarry Smith   PetscFunctionReturn(0);
19060ac07820SSatish Balay }
190772dacd9aSBarry Smith 
19085615d1e5SSatish Balay #undef __FUNC__
19095615d1e5SSatish Balay #define __FUNC__ "MatPrintHelp_MPIBAIJ"
1910ec1ea8d8SLois Curfman McInnes int MatPrintHelp_MPIBAIJ(Mat A)
1911ba4ca20aSSatish Balay {
1912ba4ca20aSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
191325fdafccSSatish Balay   MPI_Comm    comm = A->comm;
1914133cdb44SSatish Balay   static int  called = 0;
19153a40ed3dSBarry Smith   int         ierr;
1916ba4ca20aSSatish Balay 
1917d64ed03dSBarry Smith   PetscFunctionBegin;
19183a40ed3dSBarry Smith   if (!a->rank) {
19193a40ed3dSBarry Smith     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
192025fdafccSSatish Balay   }
192125fdafccSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1922133cdb44SSatish Balay   (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");
1923133cdb44SSatish Balay   (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");
19243a40ed3dSBarry Smith   PetscFunctionReturn(0);
1925ba4ca20aSSatish Balay }
19260ac07820SSatish Balay 
19275615d1e5SSatish Balay #undef __FUNC__
19285615d1e5SSatish Balay #define __FUNC__ "MatSetUnfactored_MPIBAIJ"
1929ec1ea8d8SLois Curfman McInnes int MatSetUnfactored_MPIBAIJ(Mat A)
1930bb5a7306SBarry Smith {
1931bb5a7306SBarry Smith   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*) A->data;
1932bb5a7306SBarry Smith   int         ierr;
1933d64ed03dSBarry Smith 
1934d64ed03dSBarry Smith   PetscFunctionBegin;
1935bb5a7306SBarry Smith   ierr = MatSetUnfactored(a->A); CHKERRQ(ierr);
19363a40ed3dSBarry Smith   PetscFunctionReturn(0);
1937bb5a7306SBarry Smith }
1938bb5a7306SBarry Smith 
19392e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
19400ac07820SSatish Balay 
194179bdfe76SSatish Balay /* -------------------------------------------------------------------*/
1942cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {
1943cc2dc46cSBarry Smith   MatSetValues_MPIBAIJ,
1944cc2dc46cSBarry Smith   MatGetRow_MPIBAIJ,
1945cc2dc46cSBarry Smith   MatRestoreRow_MPIBAIJ,
1946cc2dc46cSBarry Smith   MatMult_MPIBAIJ,
1947cc2dc46cSBarry Smith   MatMultAdd_MPIBAIJ,
1948cc2dc46cSBarry Smith   MatMultTrans_MPIBAIJ,
1949cc2dc46cSBarry Smith   MatMultTransAdd_MPIBAIJ,
1950cc2dc46cSBarry Smith   0,
1951cc2dc46cSBarry Smith   0,
1952cc2dc46cSBarry Smith   0,
1953cc2dc46cSBarry Smith   0,
1954cc2dc46cSBarry Smith   0,
1955cc2dc46cSBarry Smith   0,
1956cc2dc46cSBarry Smith   0,
1957cc2dc46cSBarry Smith   MatTranspose_MPIBAIJ,
1958cc2dc46cSBarry Smith   MatGetInfo_MPIBAIJ,
1959cc2dc46cSBarry Smith   0,
1960cc2dc46cSBarry Smith   MatGetDiagonal_MPIBAIJ,
1961cc2dc46cSBarry Smith   MatDiagonalScale_MPIBAIJ,
1962cc2dc46cSBarry Smith   MatNorm_MPIBAIJ,
1963cc2dc46cSBarry Smith   MatAssemblyBegin_MPIBAIJ,
1964cc2dc46cSBarry Smith   MatAssemblyEnd_MPIBAIJ,
1965cc2dc46cSBarry Smith   0,
1966cc2dc46cSBarry Smith   MatSetOption_MPIBAIJ,
1967cc2dc46cSBarry Smith   MatZeroEntries_MPIBAIJ,
1968cc2dc46cSBarry Smith   MatZeroRows_MPIBAIJ,
1969cc2dc46cSBarry Smith   0,
1970cc2dc46cSBarry Smith   0,
1971cc2dc46cSBarry Smith   0,
1972cc2dc46cSBarry Smith   0,
1973cc2dc46cSBarry Smith   MatGetSize_MPIBAIJ,
1974cc2dc46cSBarry Smith   MatGetLocalSize_MPIBAIJ,
1975cc2dc46cSBarry Smith   MatGetOwnershipRange_MPIBAIJ,
1976cc2dc46cSBarry Smith   0,
1977cc2dc46cSBarry Smith   0,
1978cc2dc46cSBarry Smith   0,
1979cc2dc46cSBarry Smith   0,
19802e8a6d31SBarry Smith   MatDuplicate_MPIBAIJ,
1981cc2dc46cSBarry Smith   0,
1982cc2dc46cSBarry Smith   0,
1983cc2dc46cSBarry Smith   0,
1984cc2dc46cSBarry Smith   0,
1985cc2dc46cSBarry Smith   0,
1986cc2dc46cSBarry Smith   MatGetSubMatrices_MPIBAIJ,
1987cc2dc46cSBarry Smith   MatIncreaseOverlap_MPIBAIJ,
1988cc2dc46cSBarry Smith   MatGetValues_MPIBAIJ,
1989cc2dc46cSBarry Smith   0,
1990cc2dc46cSBarry Smith   MatPrintHelp_MPIBAIJ,
1991cc2dc46cSBarry Smith   MatScale_MPIBAIJ,
1992cc2dc46cSBarry Smith   0,
1993cc2dc46cSBarry Smith   0,
1994cc2dc46cSBarry Smith   0,
1995cc2dc46cSBarry Smith   MatGetBlockSize_MPIBAIJ,
1996cc2dc46cSBarry Smith   0,
1997cc2dc46cSBarry Smith   0,
1998cc2dc46cSBarry Smith   0,
1999cc2dc46cSBarry Smith   0,
2000cc2dc46cSBarry Smith   0,
2001cc2dc46cSBarry Smith   0,
2002cc2dc46cSBarry Smith   MatSetUnfactored_MPIBAIJ,
2003cc2dc46cSBarry Smith   0,
2004cc2dc46cSBarry Smith   MatSetValuesBlocked_MPIBAIJ,
2005cc2dc46cSBarry Smith   0,
2006cc2dc46cSBarry Smith   0,
2007cc2dc46cSBarry Smith   0,
2008cc2dc46cSBarry Smith   MatGetMaps_Petsc};
200979bdfe76SSatish Balay 
20105ef9f2a5SBarry Smith 
2011e18c124aSSatish Balay EXTERN_C_BEGIN
20125ef9f2a5SBarry Smith #undef __FUNC__
20135ef9f2a5SBarry Smith #define __FUNC__ "MatGetDiagonalBlock_MPIBAIJ"
20145ef9f2a5SBarry Smith int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
20155ef9f2a5SBarry Smith {
20165ef9f2a5SBarry Smith   PetscFunctionBegin;
20175ef9f2a5SBarry Smith   *a      = ((Mat_MPIBAIJ *)A->data)->A;
20185ef9f2a5SBarry Smith   *iscopy = PETSC_FALSE;
20195ef9f2a5SBarry Smith   PetscFunctionReturn(0);
20205ef9f2a5SBarry Smith }
2021e18c124aSSatish Balay EXTERN_C_END
202279bdfe76SSatish Balay 
20235615d1e5SSatish Balay #undef __FUNC__
20245615d1e5SSatish Balay #define __FUNC__ "MatCreateMPIBAIJ"
202579bdfe76SSatish Balay /*@C
202679bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
202779bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
202879bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
202979bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
203079bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
203179bdfe76SSatish Balay 
2032db81eaa0SLois Curfman McInnes    Collective on MPI_Comm
2033db81eaa0SLois Curfman McInnes 
203479bdfe76SSatish Balay    Input Parameters:
2035db81eaa0SLois Curfman McInnes +  comm - MPI communicator
203679bdfe76SSatish Balay .  bs   - size of blockk
203779bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
203892e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
203992e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
204092e8d321SLois Curfman McInnes .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
204192e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
204292e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
2043be79a94dSBarry Smith .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2044be79a94dSBarry Smith .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
204579bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
204679bdfe76SSatish Balay            submatrix  (same for all local rows)
204792e8d321SLois Curfman McInnes .  d_nzz - array containing the number of block nonzeros in the various block rows
204892e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
2049db81eaa0SLois Curfman McInnes            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
205092e8d321SLois Curfman McInnes .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
205179bdfe76SSatish Balay            submatrix (same for all local rows).
2052db81eaa0SLois Curfman McInnes -  o_nzz - array containing the number of nonzeros in the various block rows of the
205392e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
205492e8d321SLois Curfman McInnes            each block row) or PETSC_NULL.
205579bdfe76SSatish Balay 
205679bdfe76SSatish Balay    Output Parameter:
205779bdfe76SSatish Balay .  A - the matrix
205879bdfe76SSatish Balay 
2059db81eaa0SLois Curfman McInnes    Options Database Keys:
2060db81eaa0SLois Curfman McInnes .   -mat_no_unroll - uses code that does not unroll the loops in the
2061db81eaa0SLois Curfman McInnes                      block calculations (much slower)
2062db81eaa0SLois Curfman McInnes .   -mat_block_size - size of the blocks to use
2063494eafd4SBarry Smith .   -mat_mpi - use the parallel matrix data structures even on one processor
2064494eafd4SBarry Smith                (defaults to using SeqBAIJ format on one processor)
20653ffaccefSLois Curfman McInnes 
2066b259b22eSLois Curfman McInnes    Notes:
206779bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
206879bdfe76SSatish Balay    (possibly both).
206979bdfe76SSatish Balay 
2070be79a94dSBarry Smith    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2071be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
2072be79a94dSBarry Smith 
207379bdfe76SSatish Balay    Storage Information:
207479bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
207579bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
207679bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
207779bdfe76SSatish Balay    local matrix (a rectangular submatrix).
207879bdfe76SSatish Balay 
207979bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
208079bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
208179bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
208279bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
208379bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
208479bdfe76SSatish Balay 
208579bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
208679bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
208779bdfe76SSatish Balay 
2088db81eaa0SLois Curfman McInnes .vb
2089db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
2090db81eaa0SLois Curfman McInnes           -------------------
2091db81eaa0SLois Curfman McInnes    row 3  |  o o o d d d o o o o o o
2092db81eaa0SLois Curfman McInnes    row 4  |  o o o d d d o o o o o o
2093db81eaa0SLois Curfman McInnes    row 5  |  o o o d d d o o o o o o
2094db81eaa0SLois Curfman McInnes           -------------------
2095db81eaa0SLois Curfman McInnes .ve
209679bdfe76SSatish Balay 
209779bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
209879bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
209979bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
210057b952d6SSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
210179bdfe76SSatish Balay 
2102d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2103d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
210479bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
210592e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
210692e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
21076da5968aSLois Curfman McInnes    matrices.
210879bdfe76SSatish Balay 
2109027ccd11SLois Curfman McInnes    Level: intermediate
2110027ccd11SLois Curfman McInnes 
211192e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel
211279bdfe76SSatish Balay 
2113db81eaa0SLois Curfman McInnes .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIAIJ()
211479bdfe76SSatish Balay @*/
211579bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
211679bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
211779bdfe76SSatish Balay {
211879bdfe76SSatish Balay   Mat          B;
211979bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
2120133cdb44SSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size,flg;
2121a2ab621fSBarry Smith   int          flag1 = 0,flag2 = 0;
212279bdfe76SSatish Balay 
2123d64ed03dSBarry Smith   PetscFunctionBegin;
2124a8c6a408SBarry Smith   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
21253914022bSBarry Smith 
21263914022bSBarry Smith   MPI_Comm_size(comm,&size);
2127494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1); CHKERRQ(ierr);
2128494eafd4SBarry Smith   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr);
2129494eafd4SBarry Smith   if (!flag1 && !flag2 && size == 1) {
21303914022bSBarry Smith     if (M == PETSC_DECIDE) M = m;
21313914022bSBarry Smith     if (N == PETSC_DECIDE) N = n;
21323914022bSBarry Smith     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A); CHKERRQ(ierr);
21333a40ed3dSBarry Smith     PetscFunctionReturn(0);
21343914022bSBarry Smith   }
21353914022bSBarry Smith 
213679bdfe76SSatish Balay   *A = 0;
21373f1db9ecSBarry Smith   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
213879bdfe76SSatish Balay   PLogObjectCreate(B);
213979bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
214079bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
2141cc2dc46cSBarry Smith   PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
21424c50302cSBarry Smith 
2143e1311b90SBarry Smith   B->ops->destroy    = MatDestroy_MPIBAIJ;
2144e1311b90SBarry Smith   B->ops->view       = MatView_MPIBAIJ;
214590f02eecSBarry Smith   B->mapping    = 0;
214679bdfe76SSatish Balay   B->factor     = 0;
214779bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
214879bdfe76SSatish Balay 
2149e0fa3b82SLois Curfman McInnes   B->insertmode = NOT_SET_VALUES;
215079bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
215179bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
215279bdfe76SSatish Balay 
2153d64ed03dSBarry Smith   if ( m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) {
2154a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2155d64ed03dSBarry Smith   }
2156a8c6a408SBarry Smith   if ( M == PETSC_DECIDE && m == PETSC_DECIDE) {
2157a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2158a8c6a408SBarry Smith   }
2159a8c6a408SBarry Smith   if ( N == PETSC_DECIDE && n == PETSC_DECIDE) {
2160a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2161a8c6a408SBarry Smith   }
2162cee3aa6bSSatish Balay   if ( M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2163cee3aa6bSSatish Balay   if ( N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
216479bdfe76SSatish Balay 
216579bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
216679bdfe76SSatish Balay     work[0] = m; work[1] = n;
216779bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
2168ca161407SBarry Smith     ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr);
216979bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
217079bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
217179bdfe76SSatish Balay   }
217279bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
217379bdfe76SSatish Balay     Mbs = M/bs;
2174a8c6a408SBarry Smith     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
217579bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
217679bdfe76SSatish Balay     m   = mbs*bs;
217779bdfe76SSatish Balay   }
217879bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
217979bdfe76SSatish Balay     Nbs = N/bs;
2180a8c6a408SBarry Smith     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
218179bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
218279bdfe76SSatish Balay     n   = nbs*bs;
218379bdfe76SSatish Balay   }
2184a8c6a408SBarry Smith   if (mbs*bs != m || nbs*bs != n) {
2185a8c6a408SBarry Smith     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2186a8c6a408SBarry Smith   }
218779bdfe76SSatish Balay 
218879bdfe76SSatish Balay   b->m = m; B->m = m;
218979bdfe76SSatish Balay   b->n = n; B->n = n;
219079bdfe76SSatish Balay   b->N = N; B->N = N;
219179bdfe76SSatish Balay   b->M = M; B->M = M;
219279bdfe76SSatish Balay   b->bs  = bs;
219379bdfe76SSatish Balay   b->bs2 = bs*bs;
219479bdfe76SSatish Balay   b->mbs = mbs;
219579bdfe76SSatish Balay   b->nbs = nbs;
219679bdfe76SSatish Balay   b->Mbs = Mbs;
219779bdfe76SSatish Balay   b->Nbs = Nbs;
219879bdfe76SSatish Balay 
2199c7fcc2eaSBarry Smith   /* the information in the maps duplicates the information computed below, eventually
2200c7fcc2eaSBarry Smith      we should remove the duplicate information that is not contained in the maps */
2201488ecbafSBarry Smith   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2202488ecbafSBarry Smith   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2203c7fcc2eaSBarry Smith 
220479bdfe76SSatish Balay   /* build local table of row and column ownerships */
220579bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
2206f09e8eb9SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
22070ac07820SSatish Balay   b->cowners = b->rowners + b->size + 2;
2208ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
220979bdfe76SSatish Balay   b->rowners[0] = 0;
221079bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
221179bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
221279bdfe76SSatish Balay   }
221379bdfe76SSatish Balay   b->rstart    = b->rowners[b->rank];
221479bdfe76SSatish Balay   b->rend      = b->rowners[b->rank+1];
22154fa0d573SSatish Balay   b->rstart_bs = b->rstart * bs;
22164fa0d573SSatish Balay   b->rend_bs   = b->rend * bs;
22174fa0d573SSatish Balay 
2218ca161407SBarry Smith   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
221979bdfe76SSatish Balay   b->cowners[0] = 0;
222079bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
222179bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
222279bdfe76SSatish Balay   }
222379bdfe76SSatish Balay   b->cstart    = b->cowners[b->rank];
222479bdfe76SSatish Balay   b->cend      = b->cowners[b->rank+1];
22254fa0d573SSatish Balay   b->cstart_bs = b->cstart * bs;
22264fa0d573SSatish Balay   b->cend_bs   = b->cend * bs;
222779bdfe76SSatish Balay 
2228a07cd24cSSatish Balay 
222979bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2230029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
223179bdfe76SSatish Balay   PLogObjectParent(B,b->A);
223279bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2233029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
223479bdfe76SSatish Balay   PLogObjectParent(B,b->B);
223579bdfe76SSatish Balay 
223679bdfe76SSatish Balay   /* build cache for off array entries formed */
2237*bbb85fb3SSatish Balay   ierr = StashCreate_Private(comm,bs,&b->stash); CHKERRQ(ierr);
223890f02eecSBarry Smith   b->donotstash  = 0;
223979bdfe76SSatish Balay   b->colmap      = 0;
224079bdfe76SSatish Balay   b->garray      = 0;
224179bdfe76SSatish Balay   b->roworiented = 1;
224279bdfe76SSatish Balay 
224330793edcSSatish Balay   /* stuff used in block assembly */
224430793edcSSatish Balay   b->barray       = 0;
224530793edcSSatish Balay 
224679bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
224779bdfe76SSatish Balay   b->lvec         = 0;
224879bdfe76SSatish Balay   b->Mvctx        = 0;
224979bdfe76SSatish Balay 
225079bdfe76SSatish Balay   /* stuff for MatGetRow() */
225179bdfe76SSatish Balay   b->rowindices   = 0;
225279bdfe76SSatish Balay   b->rowvalues    = 0;
225379bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
225479bdfe76SSatish Balay 
2255a07cd24cSSatish Balay   /* hash table stuff */
2256a07cd24cSSatish Balay   b->ht           = 0;
2257187ce0cbSSatish Balay   b->hd           = 0;
22580bdbc534SSatish Balay   b->ht_size      = 0;
2259133cdb44SSatish Balay   b->ht_flag      = 0;
226025fdafccSSatish Balay   b->ht_fact      = 0;
2261187ce0cbSSatish Balay   b->ht_total_ct  = 0;
2262187ce0cbSSatish Balay   b->ht_insert_ct = 0;
2263a07cd24cSSatish Balay 
226479bdfe76SSatish Balay   *A = B;
2265133cdb44SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg); CHKERRQ(ierr);
2266133cdb44SSatish Balay   if (flg) {
2267133cdb44SSatish Balay     double fact = 1.39;
2268133cdb44SSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE); CHKERRQ(ierr);
2269133cdb44SSatish Balay     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,&flg); CHKERRQ(ierr);
2270133cdb44SSatish Balay     if (fact <= 1.0) fact = 1.39;
2271133cdb44SSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact); CHKERRQ(ierr);
2272133cdb44SSatish Balay     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2273133cdb44SSatish Balay   }
22745ef9f2a5SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",
22755ef9f2a5SBarry Smith                                      "MatGetDiagonalBlock_MPIBAIJ",
22765ef9f2a5SBarry Smith                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
22773a40ed3dSBarry Smith   PetscFunctionReturn(0);
227879bdfe76SSatish Balay }
2279026e39d0SSatish Balay 
22805615d1e5SSatish Balay #undef __FUNC__
22812e8a6d31SBarry Smith #define __FUNC__ "MatDuplicate_MPIBAIJ"
22822e8a6d31SBarry Smith static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
22830ac07820SSatish Balay {
22840ac07820SSatish Balay   Mat         mat;
22850ac07820SSatish Balay   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ *) matin->data;
22860ac07820SSatish Balay   int         ierr, len=0, flg;
22870ac07820SSatish Balay 
2288d64ed03dSBarry Smith   PetscFunctionBegin;
22890ac07820SSatish Balay   *newmat       = 0;
22903f1db9ecSBarry Smith   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
22910ac07820SSatish Balay   PLogObjectCreate(mat);
22920ac07820SSatish Balay   mat->data       = (void *) (a = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(a);
2293cc2dc46cSBarry Smith   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
2294e1311b90SBarry Smith   mat->ops->destroy    = MatDestroy_MPIBAIJ;
2295e1311b90SBarry Smith   mat->ops->view       = MatView_MPIBAIJ;
22960ac07820SSatish Balay   mat->factor          = matin->factor;
22970ac07820SSatish Balay   mat->assembled       = PETSC_TRUE;
2298aef5e8e0SSatish Balay   mat->insertmode      = NOT_SET_VALUES;
22990ac07820SSatish Balay 
23000ac07820SSatish Balay   a->m = mat->m   = oldmat->m;
23010ac07820SSatish Balay   a->n = mat->n   = oldmat->n;
23020ac07820SSatish Balay   a->M = mat->M   = oldmat->M;
23030ac07820SSatish Balay   a->N = mat->N   = oldmat->N;
23040ac07820SSatish Balay 
23050ac07820SSatish Balay   a->bs  = oldmat->bs;
23060ac07820SSatish Balay   a->bs2 = oldmat->bs2;
23070ac07820SSatish Balay   a->mbs = oldmat->mbs;
23080ac07820SSatish Balay   a->nbs = oldmat->nbs;
23090ac07820SSatish Balay   a->Mbs = oldmat->Mbs;
23100ac07820SSatish Balay   a->Nbs = oldmat->Nbs;
23110ac07820SSatish Balay 
23120ac07820SSatish Balay   a->rstart       = oldmat->rstart;
23130ac07820SSatish Balay   a->rend         = oldmat->rend;
23140ac07820SSatish Balay   a->cstart       = oldmat->cstart;
23150ac07820SSatish Balay   a->cend         = oldmat->cend;
23160ac07820SSatish Balay   a->size         = oldmat->size;
23170ac07820SSatish Balay   a->rank         = oldmat->rank;
2318aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
2319aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
2320aef5e8e0SSatish Balay   a->rowindices   = 0;
23210ac07820SSatish Balay   a->rowvalues    = 0;
23220ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
232330793edcSSatish Balay   a->barray       = 0;
23243123a43fSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
23253123a43fSSatish Balay   a->rend_bs      = oldmat->rend_bs;
23263123a43fSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
23273123a43fSSatish Balay   a->cend_bs      = oldmat->cend_bs;
23280ac07820SSatish Balay 
2329133cdb44SSatish Balay   /* hash table stuff */
2330133cdb44SSatish Balay   a->ht           = 0;
2331133cdb44SSatish Balay   a->hd           = 0;
2332133cdb44SSatish Balay   a->ht_size      = 0;
2333133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
233425fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2335133cdb44SSatish Balay   a->ht_total_ct  = 0;
2336133cdb44SSatish Balay   a->ht_insert_ct = 0;
2337133cdb44SSatish Balay 
2338133cdb44SSatish Balay 
23390ac07820SSatish Balay   a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners);
2340f09e8eb9SSatish Balay   PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
23410ac07820SSatish Balay   a->cowners = a->rowners + a->size + 2;
23420ac07820SSatish Balay   PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
2343*bbb85fb3SSatish Balay   ierr = StashCreate_Private(matin->comm,oldmat->bs,&a->stash); CHKERRQ(ierr);
23440ac07820SSatish Balay   if (oldmat->colmap) {
234548e59246SSatish Balay #if defined (USE_CTABLE)
2346fa46199cSSatish Balay   ierr = TableCreateCopy(oldmat->colmap,&a->colmap); CHKERRQ(ierr);
234748e59246SSatish Balay #else
23480ac07820SSatish Balay     a->colmap = (int *) PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
23490ac07820SSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
23500ac07820SSatish Balay     PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
235148e59246SSatish Balay #endif
23520ac07820SSatish Balay   } else a->colmap = 0;
23530ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *) (oldmat->B->data))->nbs)) {
23540ac07820SSatish Balay     a->garray = (int *) PetscMalloc(len*sizeof(int)); CHKPTRQ(a->garray);
23550ac07820SSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
23560ac07820SSatish Balay     PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
23570ac07820SSatish Balay   } else a->garray = 0;
23580ac07820SSatish Balay 
23590ac07820SSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr);
23600ac07820SSatish Balay   PLogObjectParent(mat,a->lvec);
23610ac07820SSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr);
2362e18c124aSSatish Balay 
23630ac07820SSatish Balay   PLogObjectParent(mat,a->Mvctx);
23642e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr);
23650ac07820SSatish Balay   PLogObjectParent(mat,a->A);
23662e8a6d31SBarry Smith   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr);
23670ac07820SSatish Balay   PLogObjectParent(mat,a->B);
23680ac07820SSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr);
2369e18c124aSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
23700ac07820SSatish Balay   if (flg) {
23710ac07820SSatish Balay     ierr = MatPrintHelp(mat); CHKERRQ(ierr);
23720ac07820SSatish Balay   }
23730ac07820SSatish Balay   *newmat = mat;
23743a40ed3dSBarry Smith   PetscFunctionReturn(0);
23750ac07820SSatish Balay }
237657b952d6SSatish Balay 
237757b952d6SSatish Balay #include "sys.h"
237857b952d6SSatish Balay 
23795615d1e5SSatish Balay #undef __FUNC__
23805615d1e5SSatish Balay #define __FUNC__ "MatLoad_MPIBAIJ"
238157b952d6SSatish Balay int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
238257b952d6SSatish Balay {
238357b952d6SSatish Balay   Mat          A;
238457b952d6SSatish Balay   int          i, nz, ierr, j,rstart, rend, fd;
238557b952d6SSatish Balay   Scalar       *vals,*buf;
238657b952d6SSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
238757b952d6SSatish Balay   MPI_Status   status;
2388cee3aa6bSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
238957b952d6SSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0, jj,*mycols,*ibuf;
239040011551SBarry Smith   int          flg,tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
239157b952d6SSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
239257b952d6SSatish Balay   int          dcount,kmax,k,nzcount,tmp;
239357b952d6SSatish Balay 
2394d64ed03dSBarry Smith   PetscFunctionBegin;
239557b952d6SSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr);
239657b952d6SSatish Balay 
239757b952d6SSatish Balay   MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank);
239857b952d6SSatish Balay   if (!rank) {
239957b952d6SSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr);
2400e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr);
2401a8c6a408SBarry Smith     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2402d64ed03dSBarry Smith     if (header[3] < 0) {
2403a8c6a408SBarry Smith       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2404d64ed03dSBarry Smith     }
24056c5fab8fSBarry Smith   }
2406d64ed03dSBarry Smith 
2407ca161407SBarry Smith   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
240857b952d6SSatish Balay   M = header[1]; N = header[2];
240957b952d6SSatish Balay 
2410a8c6a408SBarry Smith   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
241157b952d6SSatish Balay 
241257b952d6SSatish Balay   /*
241357b952d6SSatish Balay      This code adds extra rows to make sure the number of rows is
241457b952d6SSatish Balay      divisible by the blocksize
241557b952d6SSatish Balay   */
241657b952d6SSatish Balay   Mbs        = M/bs;
241757b952d6SSatish Balay   extra_rows = bs - M + bs*(Mbs);
241857b952d6SSatish Balay   if (extra_rows == bs) extra_rows = 0;
241957b952d6SSatish Balay   else                  Mbs++;
242057b952d6SSatish Balay   if (extra_rows &&!rank) {
2421b0267e0aSLois Curfman McInnes     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
242257b952d6SSatish Balay   }
2423537820f0SBarry Smith 
242457b952d6SSatish Balay   /* determine ownership of all rows */
242557b952d6SSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
242657b952d6SSatish Balay   m   = mbs * bs;
2427cee3aa6bSSatish Balay   rowners = (int *) PetscMalloc(2*(size+2)*sizeof(int)); CHKPTRQ(rowners);
2428cee3aa6bSSatish Balay   browners = rowners + size + 1;
2429ca161407SBarry Smith   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
243057b952d6SSatish Balay   rowners[0] = 0;
2431cee3aa6bSSatish Balay   for ( i=2; i<=size; i++ ) rowners[i] += rowners[i-1];
2432cee3aa6bSSatish Balay   for ( i=0; i<=size;  i++ ) browners[i] = rowners[i]*bs;
243357b952d6SSatish Balay   rstart = rowners[rank];
243457b952d6SSatish Balay   rend   = rowners[rank+1];
243557b952d6SSatish Balay 
243657b952d6SSatish Balay   /* distribute row lengths to all processors */
243757b952d6SSatish Balay   locrowlens = (int*) PetscMalloc( (rend-rstart)*bs*sizeof(int) ); CHKPTRQ(locrowlens);
243857b952d6SSatish Balay   if (!rank) {
243957b952d6SSatish Balay     rowlengths = (int*) PetscMalloc( (M+extra_rows)*sizeof(int) ); CHKPTRQ(rowlengths);
2440e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr);
244157b952d6SSatish Balay     for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1;
244257b952d6SSatish Balay     sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts);
2443cee3aa6bSSatish Balay     for ( i=0; i<size; i++ ) sndcounts[i] = browners[i+1] - browners[i];
2444ca161407SBarry Smith     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
244557b952d6SSatish Balay     PetscFree(sndcounts);
2446d64ed03dSBarry Smith   } else {
2447ca161407SBarry Smith     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT, 0,comm);CHKERRQ(ierr);
244857b952d6SSatish Balay   }
244957b952d6SSatish Balay 
245057b952d6SSatish Balay   if (!rank) {
245157b952d6SSatish Balay     /* calculate the number of nonzeros on each processor */
245257b952d6SSatish Balay     procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz);
245357b952d6SSatish Balay     PetscMemzero(procsnz,size*sizeof(int));
245457b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
245557b952d6SSatish Balay       for ( j=rowners[i]*bs; j< rowners[i+1]*bs; j++ ) {
245657b952d6SSatish Balay         procsnz[i] += rowlengths[j];
245757b952d6SSatish Balay       }
245857b952d6SSatish Balay     }
245957b952d6SSatish Balay     PetscFree(rowlengths);
246057b952d6SSatish Balay 
246157b952d6SSatish Balay     /* determine max buffer needed and allocate it */
246257b952d6SSatish Balay     maxnz = 0;
246357b952d6SSatish Balay     for ( i=0; i<size; i++ ) {
246457b952d6SSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
246557b952d6SSatish Balay     }
246657b952d6SSatish Balay     cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols);
246757b952d6SSatish Balay 
246857b952d6SSatish Balay     /* read in my part of the matrix column indices  */
246957b952d6SSatish Balay     nz = procsnz[0];
247057b952d6SSatish Balay     ibuf = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
247157b952d6SSatish Balay     mycols = ibuf;
2472cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2473e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr);
2474cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2475cee3aa6bSSatish Balay 
247657b952d6SSatish Balay     /* read in every ones (except the last) and ship off */
247757b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
247857b952d6SSatish Balay       nz   = procsnz[i];
2479e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
2480ca161407SBarry Smith       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
248157b952d6SSatish Balay     }
248257b952d6SSatish Balay     /* read in the stuff for the last proc */
248357b952d6SSatish Balay     if ( size != 1 ) {
248457b952d6SSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2485e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr);
248657b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) cols[nz+i] = M+i;
2487ca161407SBarry Smith       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
248857b952d6SSatish Balay     }
248957b952d6SSatish Balay     PetscFree(cols);
2490d64ed03dSBarry Smith   } else {
249157b952d6SSatish Balay     /* determine buffer space needed for message */
249257b952d6SSatish Balay     nz = 0;
249357b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
249457b952d6SSatish Balay       nz += locrowlens[i];
249557b952d6SSatish Balay     }
249657b952d6SSatish Balay     ibuf   = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(ibuf);
249757b952d6SSatish Balay     mycols = ibuf;
249857b952d6SSatish Balay     /* receive message of column indices*/
2499ca161407SBarry Smith     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2500ca161407SBarry Smith     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2501a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
250257b952d6SSatish Balay   }
250357b952d6SSatish Balay 
250457b952d6SSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2505cee3aa6bSSatish Balay   dlens  = (int *) PetscMalloc( 2*(rend-rstart+1)*sizeof(int) ); CHKPTRQ(dlens);
2506cee3aa6bSSatish Balay   odlens = dlens + (rend-rstart);
250757b952d6SSatish Balay   mask   = (int *) PetscMalloc( 3*Mbs*sizeof(int) ); CHKPTRQ(mask);
2508cee3aa6bSSatish Balay   PetscMemzero(mask,3*Mbs*sizeof(int));
250957b952d6SSatish Balay   masked1 = mask    + Mbs;
251057b952d6SSatish Balay   masked2 = masked1 + Mbs;
251157b952d6SSatish Balay   rowcount = 0; nzcount = 0;
251257b952d6SSatish Balay   for ( i=0; i<mbs; i++ ) {
251357b952d6SSatish Balay     dcount  = 0;
251457b952d6SSatish Balay     odcount = 0;
251557b952d6SSatish Balay     for ( j=0; j<bs; j++ ) {
251657b952d6SSatish Balay       kmax = locrowlens[rowcount];
251757b952d6SSatish Balay       for ( k=0; k<kmax; k++ ) {
251857b952d6SSatish Balay         tmp = mycols[nzcount++]/bs;
251957b952d6SSatish Balay         if (!mask[tmp]) {
252057b952d6SSatish Balay           mask[tmp] = 1;
252157b952d6SSatish Balay           if (tmp < rstart || tmp >= rend ) masked2[odcount++] = tmp;
252257b952d6SSatish Balay           else masked1[dcount++] = tmp;
252357b952d6SSatish Balay         }
252457b952d6SSatish Balay       }
252557b952d6SSatish Balay       rowcount++;
252657b952d6SSatish Balay     }
2527cee3aa6bSSatish Balay 
252857b952d6SSatish Balay     dlens[i]  = dcount;
252957b952d6SSatish Balay     odlens[i] = odcount;
2530cee3aa6bSSatish Balay 
253157b952d6SSatish Balay     /* zero out the mask elements we set */
253257b952d6SSatish Balay     for ( j=0; j<dcount; j++ ) mask[masked1[j]] = 0;
253357b952d6SSatish Balay     for ( j=0; j<odcount; j++ ) mask[masked2[j]] = 0;
253457b952d6SSatish Balay   }
2535cee3aa6bSSatish Balay 
253657b952d6SSatish Balay   /* create our matrix */
2537537820f0SBarry Smith   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2538537820f0SBarry Smith          CHKERRQ(ierr);
253957b952d6SSatish Balay   A = *newmat;
25406d4a8577SBarry Smith   MatSetOption(A,MAT_COLUMNS_SORTED);
254157b952d6SSatish Balay 
254257b952d6SSatish Balay   if (!rank) {
254357b952d6SSatish Balay     buf = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(buf);
254457b952d6SSatish Balay     /* read in my part of the matrix numerical values  */
254557b952d6SSatish Balay     nz = procsnz[0];
254657b952d6SSatish Balay     vals = buf;
2547cee3aa6bSSatish Balay     mycols = ibuf;
2548cee3aa6bSSatish Balay     if (size == 1)  nz -= extra_rows;
2549e5638eb3SBarry Smith     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2550cee3aa6bSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2551537820f0SBarry Smith 
255257b952d6SSatish Balay     /* insert into matrix */
255357b952d6SSatish Balay     jj      = rstart*bs;
255457b952d6SSatish Balay     for ( i=0; i<m; i++ ) {
255557b952d6SSatish Balay       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
255657b952d6SSatish Balay       mycols += locrowlens[i];
255757b952d6SSatish Balay       vals   += locrowlens[i];
255857b952d6SSatish Balay       jj++;
255957b952d6SSatish Balay     }
256057b952d6SSatish Balay     /* read in other processors (except the last one) and ship out */
256157b952d6SSatish Balay     for ( i=1; i<size-1; i++ ) {
256257b952d6SSatish Balay       nz   = procsnz[i];
256357b952d6SSatish Balay       vals = buf;
2564e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
2565ca161407SBarry Smith       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
256657b952d6SSatish Balay     }
256757b952d6SSatish Balay     /* the last proc */
256857b952d6SSatish Balay     if ( size != 1 ){
256957b952d6SSatish Balay       nz   = procsnz[i] - extra_rows;
2570cee3aa6bSSatish Balay       vals = buf;
2571e5638eb3SBarry Smith       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr);
257257b952d6SSatish Balay       for ( i=0; i<extra_rows; i++ ) vals[nz+i] = 1.0;
2573ca161407SBarry Smith       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
257457b952d6SSatish Balay     }
257557b952d6SSatish Balay     PetscFree(procsnz);
2576d64ed03dSBarry Smith   } else {
257757b952d6SSatish Balay     /* receive numeric values */
257857b952d6SSatish Balay     buf = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(buf);
257957b952d6SSatish Balay 
258057b952d6SSatish Balay     /* receive message of values*/
258157b952d6SSatish Balay     vals   = buf;
2582cee3aa6bSSatish Balay     mycols = ibuf;
2583ca161407SBarry Smith     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2584ca161407SBarry Smith     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2585a8c6a408SBarry Smith     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
258657b952d6SSatish Balay 
258757b952d6SSatish Balay     /* insert into matrix */
258857b952d6SSatish Balay     jj      = rstart*bs;
2589cee3aa6bSSatish Balay     for ( i=0; i<m; i++ ) {
259057b952d6SSatish Balay       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
259157b952d6SSatish Balay       mycols += locrowlens[i];
259257b952d6SSatish Balay       vals   += locrowlens[i];
259357b952d6SSatish Balay       jj++;
259457b952d6SSatish Balay     }
259557b952d6SSatish Balay   }
259657b952d6SSatish Balay   PetscFree(locrowlens);
259757b952d6SSatish Balay   PetscFree(buf);
259857b952d6SSatish Balay   PetscFree(ibuf);
259957b952d6SSatish Balay   PetscFree(rowners);
260057b952d6SSatish Balay   PetscFree(dlens);
2601cee3aa6bSSatish Balay   PetscFree(mask);
26026d4a8577SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
26036d4a8577SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
26043a40ed3dSBarry Smith   PetscFunctionReturn(0);
260557b952d6SSatish Balay }
260657b952d6SSatish Balay 
260757b952d6SSatish Balay 
2608133cdb44SSatish Balay 
2609133cdb44SSatish Balay #undef __FUNC__
2610133cdb44SSatish Balay #define __FUNC__ "MatMPIBAIJSetHashTableFactor"
2611133cdb44SSatish Balay /*@
2612133cdb44SSatish Balay    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2613133cdb44SSatish Balay 
2614133cdb44SSatish Balay    Input Parameters:
2615133cdb44SSatish Balay .  mat  - the matrix
2616133cdb44SSatish Balay .  fact - factor
2617133cdb44SSatish Balay 
2618fee21e36SBarry Smith    Collective on Mat
2619fee21e36SBarry Smith 
2620133cdb44SSatish Balay   Notes:
2621133cdb44SSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2622133cdb44SSatish Balay 
2623133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT
2624133cdb44SSatish Balay 
2625133cdb44SSatish Balay .seealso: MatSetOption()
2626133cdb44SSatish Balay @*/
2627133cdb44SSatish Balay int MatMPIBAIJSetHashTableFactor(Mat mat,double fact)
2628133cdb44SSatish Balay {
262925fdafccSSatish Balay   Mat_MPIBAIJ *baij;
2630133cdb44SSatish Balay 
2631133cdb44SSatish Balay   PetscFunctionBegin;
2632133cdb44SSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
263325fdafccSSatish Balay   if (mat->type != MATMPIBAIJ) {
2634133cdb44SSatish Balay       SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2635133cdb44SSatish Balay   }
2636133cdb44SSatish Balay   baij = (Mat_MPIBAIJ*) mat->data;
2637133cdb44SSatish Balay   baij->ht_fact = fact;
2638133cdb44SSatish Balay   PetscFunctionReturn(0);
2639133cdb44SSatish Balay }
2640